[Oberon] n-o: Rx.Tool ? span multiple lines
edgar at edgarschwarz.de
edgar at edgarschwarz.de
Fri Oct 31 23:49:13 CET 2003
Hi Chris,
> I'm getting emails like this (from win users) :-----------
> <p class=3DMsoPlainText><font size=3D2 face=3D"Courier New"><span lang=3D=
> EN-ZA
> style=3D'font-size:10.0pt'>
> I have made some remarks for you. You are welc=
> ome to
> phone me anytime.<o:p></o:p></span></font></p>
> -----------
> And it is politically incorrect for me to tell the sender to "do it properly".
> I suspect they feel they must have a spell checker available ?
I just think about extracting information from something like:
<html>
<head>
<title>Clusterball - The Best Internet Game</title>
<base target="_self">
<style>table {font-size: 8pt}</style>
<link rel="stylesheet" type="text/css" href="cb_chapters.css">
</head>
<body>
<h3>History:</h3>
<hr>
<table border='0'><tr><td><img border='0' src='http://clusterball/images/wi_china.jpg'>
</td><td><font size='2'>Date: 2003-08-03 01:20:49<br>Venue: China<br>Time: 10 min<br>
Players: 7<br></tr></table><table border='0' width='75%'><tr><td bgcolor=#EDEDED
align='center'><font size='1'><b>Placing</b></td><td bgcolor=#EDEDED><font size='1'>
<b>Nick</b></td><td bg ...
So we have a similar problem at the moment.
I now do it like this:
CONST
(* For scanner *)
TagClass* = 8;
ArrClass* = 9;
TAB = 9X; CR = 0DX; LF = 0AX;
TYPE
Scanner* = RECORD (Texts.Scanner) (* nextCh *)
ls*: ARRAY 1024 OF CHAR; (* long string *)
llen*: INTEGER; (* long len *)
END;
PROCEDURE Scan*(VAR S: Scanner);
VAR i: LONGINT; ch: CHAR;
BEGIN i := 0;
LOOP (* skip whitespace *)
IF (S.lib = NIL) OR ~(S.lib IS Fonts.Font) THEN EXIT
ELSIF S.nextCh = CR THEN INC(S.line)
ELSIF (S.nextCh # " ") & (S.nextCh # TAB) & (S.nextCh # LF) THEN EXIT
END ;
Texts.Read(S, S.nextCh)
END;
IF S.nextCh = '<' THEN (* Read a tag '< ..... >' *)
Texts.Read(S, ch);
WHILE (ch # '>') & (i # LEN(S.ls)-1) DO
S.ls[i] := ch; INC(i); Texts.Read(S, ch)
END;
WHILE (ch # '>') & (S.lib IS Fonts.Font) DO Texts.Read(S, ch) END; (* Drop rest of tag if too long. *)
S.ls[i] := 0X; S.llen := SHORT(i);
Texts.Read(S, S.nextCh); S.class := TagClass;
ELSIF S.class = TagClass THEN (* Read char array after a tag until next tag. *)
ch := S.nextCh;
WHILE (ch # '<') & (i # LEN(S.ls)-1) DO
S.ls[i] := ch; INC(i); Texts.Read(S, ch)
END;
WHILE (ch # '<') & (S.lib IS Fonts.Font) DO Texts.Read(S, ch) END; (* Drop rest of tag if too long. *)
S.ls[i] := 0X; S.llen := SHORT(i);
S.nextCh := ch; S.class := ArrClass;
ELSE (* Do normal scan. *)
Texts.Scan(S);
END;
END Scan;
PROCEDURE ScanCb*();
VAR S: Scanner; cbName: ARRAY 32 OF CHAR; t: Texts.Text;
BEGIN
In.Open; In.Name(cbName);
NEW(t); Texts.Open(t, cbName);
Texts.OpenScanner(S, t, 0);
LOOP
Scan(S); IF S.eot THEN EXIT; END;
IF S.class = TagClass THEN
IF Utilities.Pos("h.php?cid=", S.ls) > -1 THEN
Out.String(S.ls);
Scan(S); Scan(S);Scan(S); Scan(S); (* rest of player *)
Scan(S); Scan(S);Scan(S); Scan(S); (* rank *)
Scan(S); Scan(S);Scan(S); Scan(S); (* title *)
Scan(S); Scan(S);Scan(S); Scan(S); (* Exp *)
Scan(S); Scan(S);Texts.Scan(S);
IF S.class = Texts.Int THEN
Out.Int(S.i, 5); Out.Ln;
ELSE
Out.String("Error"); Out.Ln;
END;
END;
ELSIF S.class = ArrClass THEN
(* Get some interesting data. *)
IF (Utilities.Pos("Time: ", S.ls) = 0) OR (Utilities.Pos("Players: ", S.ls) = 0)
OR (Utilities.Pos("Date: ", S.ls) = 0) THEN
Out.String(S.ls); Out.Ln;
END;
END;
END;
END ScanCb;
Just a little hack of two hours. Perhaps you can use something similar.
Cheers, Edgar
--
edgar at edgarschwarz.de "http://www.edgarschwarz.de"
"http://www.edgar-schwarz.de/forum/oberon" Running Active Oberon
Make it as simple as possible, but not simpler. Albert Einstein
More information about the Oberon
mailing list