program gisela;
%include '/sys/ins/base.ins.pas';
const
max_class = 20;
max_commands = 10;
unknown = 0;
ask = 1;
special = max_class;
verb = 2;
noun = 3;
adj = 4;
adv = 5;
prep = 6;
pron = 7;
count= 8;
conj = 9;
subj =10;
intj =11;
art =12;
infm =13;
intrans = 2;
trans = 3;
mass = 4;
pers = 5;
rel = 6;
dem = 7;
(*count = 8;*)
quest = 9;
neg =10;
aux =11;
part =12;
base = 2;
inf = 3;
pret = 4;
pres = 5;
sdef = 6;
pldef = 7;
sidef = 8;
plidef = 9;
sing =10;
plur =11;
c_read = 1;
c_write = 2;
c_define = 3;
c_select = 4;
c_show = 5;
c_quit = 6;
c_exit = 7;
c_clear = 8;
c_sort = 9;
c_help = 10;
p_lexicon = 1;
p_corpus = 2;
p_word = 3;
p_context = 4;
p_structure = 5;
p_class = 6;
p_command = 7;
p_histogram = 8;
p_selection = 9;
p_sentence = 10;
p_ending = 11;
p_subclause = 12;
p_wordorder = 13;
p_subwo = 14;
o_class = 1;
o_subclass = 2;
o_morph = 3;
o_root = 4;
o_ending = 5;
o_ordered = 6;
o_initial = 7;
o_wordorder = 8;
sv = 1;
vs = 2;
v1 = 3;
vns = 4;
vsn = 5;
nvs = 6;
snv = 7;
svn = 8;
nsv = 9;
v2 = 10;
vn = 11;
nv = 12;
sentb = 1;
wordb = 2;
word2b =3;
type word_ptr = ^word_type;
word2_ptr = ^word2_type;
sentence_ptr = ^sentence_type;
sentb_ptr = ^sentb_type;
wordb_ptr = ^wordb_type;
word2b_ptr = ^word2b_type;
list_type = array[1..255] of integer;
shortlist_type = array[0..15] of integer;
word_type = record
word : string;
class : integer;
subclass : integer;
morph : integer;
count_as : word_ptr;
ending : word_ptr;
below, above : word_ptr;
occurence : word2_ptr;
n_occur : integer;
end;
word2_type = record
word : word_ptr;
class : integer;
subclass : integer;
morph : integer;
next, occurence : word2_ptr;
sentence : sentence_ptr;
flag : integer;
end;
sentence_type = record
words : word2_ptr;
nwords: integer;
nclause: integer;
word_order : shortlist_type;
phrase_type : shortlist_type;
cstop : char;
next : sentence_ptr;
end;
name_array = array[0..max_class] of string;
hist_ptr = ^hist_type;
hist_type = record
nch : integer;
title : string;
xmin,xmax,bw : real;
overflow,underflow : integer;
data : array[0..100] of integer;
lab : ^name_array;
end;
var s,s1,s2:string;
blank,line: string;
swedish_chars : array[1..6] of char;
icommand : integer;
cline : array[1..max_commands] of string;
lowercase : set of char;
word_root,pword,endroot : word_ptr;
current_sent : sentence_ptr;
class_name,subclass_name,morph_name : name_array;
command_name,p_name,sentence_name,wordorder_name : name_array;
option_name : array[0..max_class] of char;
(**phrase_type : array[0..99] of string;**)
nwords,oldindex : integer;
nwtot,nsent,nwsent,wleng,wlsum : integer;
flag : integer;
i,j,k,l,m,n : integer;
jline,ind : integer;
infile : text;
cfile : text;
status : integer32;
random_seed : linteger;
prob : array[1..255] of real;
xnorm,ran,xk : real;
quit : boolean;
hist : array[1..100] of hist_ptr;
sent_root : sentence_ptr;
word2_root : word2_ptr;
sel_words : array[1..3] of word_ptr;
nsel_words : integer;
sel_phrase_type,sel_sub_type,sel_wordorder : integer;
sel_ordered,sel_initial : boolean;
sel_wo,sel_subwo : integer;
function string_to_int(s:string):integer;
var i,j,k : integer;
begin
k := 0;
for i := 1 to 80 do
if ( s[i] in ['0'..'9'] ) then
k := 10*k + ord(s[i])-ord('0');
string_to_int := k;
end; (** string_to_int ***)
procedure new_sent(var sent:sentence_ptr);
var ss,ssold:sentence_ptr;
begin
ss := sent_root;
while ( ss^.next <> nil ) do
ss := ss^.next;
new(ss^.next);
ss := ss^.next;
ss^.next := nil;
sent := ss;
end;
procedure new_word2(var word2:word2_ptr);
var ss,ssold:word2_ptr;
begin
ss := word2_root;
while ( ss^.next <> nil ) do
ss := ss^.next;
new(ss^.next);
ss := ss^.next;
ss^.next := nil;
word2 := ss;
end;
procedure hbook1(id:integer;tit:string;ncha:integer;xxmin,xxmax:real);
var i,j : integer;
begin
if (( id <= 0 ) or ( id > 100 )) then
writeln('Hist id must be between 1 and 100!')
else if ( hist[id] <> nil ) then
writeln('Hist ',id,' already exists.')
else if (( ncha <= 0 ) or ( ncha > 100 )) then
writeln('Hist # channels must be between 1 and 100!')
else
begin
new(hist[id]);
with hist[id]^ do
begin
nch := ncha;
title := tit;
xmin := xxmin;
xmax := xxmax;
bw := (xmax-xmin)/nch;
overflow := 0;
underflow := 0;
for i := 1 to 100 do
data[i] := 0;
end;
end;
end; (*** hbook1 ***)
procedure hfill(id:integer;x:real);
var i,j : integer;
begin
if (( id <= 0 ) or ( id > 100 )) then
writeln('Hist id must be between 1 and 100!')
else
with hist[id]^ do
begin
if ( x < xmin ) then
underflow := underflow + 1
else if ( x >= xmax ) then
overflow := overflow + 1
else
begin
i := trunc((x-xmin)/bw) + 1;
data[i] := data[i]+1;
end;
end;
end; (*** hfill ***)
procedure hfilli(id:integer;ix:integer);
var x : real;
begin
x := ix;
hfill(id,x);
end; (*** hfilli ***)
procedure hprint(id:integer);
var i,j,k,m,n : integer;
cmax,hscale,cpot,csum : integer;
height : array[1..100] of integer;
sx,sx2 : real;
x,mean,sigma : real;
begin
if ( id = 0 ) then
begin
for i := 1 to 100 do
if ( hist[i] <> nil ) then
hprint(i)
end
else if (( id < 0 ) or ( id > 100 )) then
writeln('Hist id must be between 1 and 100!')
else if ( hist[id] = nil ) then
writeln('Hist id ',id:5,' does not exist.')
else
with hist[id]^ do
begin
writeln('--------------------------------------------------------');
writeln(' Histogram ',id:3,' : ',title);
writeln('--------------------------------------------------------');
cmax := 0;
for i := 1 to nch do
if ( data[i] > cmax ) then
cmax := data[i];
if ( cmax = 0 ) then
writeln('Empty histogram.')
else
begin
if ( cmax < 40 ) then
hscale := 1
else
hscale := cmax div 40 + 1;
for j := (cmax div hscale)+1 downto 1 do
begin
write(j*hscale:4,'|');
for i := 1 to nch do
if ( data[i] >= j*hscale ) then
write('#')
else if ( data[i] > (j-1)*hscale ) then
write('+')
else
write(' ');
writeln;
end;
writeln('--------------------------------------------------------');
write('Ch# :');
for i := 1 to nch do
if ( (i mod 10) = 0 ) then
write((i div 10):1)
else
write(' ');
writeln;
write(' ');
for i := 1 to nch do
write((i mod 10):1);
writeln;
writeln('--------------------------------------------------------');
if ( cmax >= 10000 ) then
writeln('Overflow in contents')
else if ( cmax >= 1000 ) then
k := 3
else if ( cmax >= 100 ) then
k := 2
else if ( cmax >= 10 ) then
k := 1
else
k := 0;
for j := k downto 0 do
begin
cpot := 1;
for i := 1 to j do
cpot := 10*cpot;
write(cpot:4,':');
for i := 1 to nch do
if ( data[i] >= cpot ) then
write(((data[i] mod (10*cpot)) div cpot):1)
else
write(' ');
writeln;
end;
writeln;
writeln('--------------------------------------------------------');
writeln('Overflow : ',overflow:7,'; Underflow : ',underflow:7);
sx := 0.0;
sx2 := 0.0;
csum := 0;
for i := 1 to nch do
begin
x := xmin+(i-0.5)*bw;
sx := sx + x*data[i];
sx2:= sx2+ x*x*data[i];
csum := csum + data[i];
end;
writeln('Total contents : ',csum+overflow+underflow:7);
if ( csum > 0 ) then
begin
mean := sx/csum;
sigma := sqrt((sx2 - csum*mean*mean)/csum);
writeln('Average : ',mean:10:3,'; Standard deviation : ',sigma:10:3);
end;
writeln('--------------------------------------------------------');
end;
end;
end; (*** hprint ***)
procedure hptab(id:integer; labels:name_array);
var i,j,k : integer;
cmax,hscale : integer;
height : array[1..100] of integer;
begin
if (( id <= 0 ) or ( id > 100 )) then
writeln('Hist id must be between 1 and 100!')
else if ( hist[id] = nil ) then
writeln('Hist id ',id:5,' does not exist.')
else
with hist[id]^ do
begin
writeln('--------------------------------------------------------');
writeln(' Histogram ',id:3,' : ',title);
writeln('--------------------------------------------------------');
cmax := 0;
for i := 1 to nch do
if ( data[i] > cmax ) then
cmax := data[i];
if ( cmax = 0 ) then
writeln('Empty histogram.')
else
begin
if ( cmax < 40 ) then
hscale := 1
else
hscale := cmax div 40 + 1;
for i := 1 to nch do
begin
write(i:2,': ',labels[i]:30,' |',data[i]:5,'|');
for j := 1 to (data[i] div hscale) do
write('#');
writeln;
end;
writeln('--------------------------------------------------------');
end;
end;
end; (*** hptab ***)
procedure hreset(id:integer);
var i,j,k : integer;
begin
if ( id = 0 ) then
begin
for i := 1 to 100 do
if ( hist[i] <> nil ) then
hreset(i)
end
else if (( id < 0 ) or ( id > 100 )) then
writeln('Hist id must be between 1 and 100!')
else if ( hist[id] = nil ) then
writeln('Hist id ',id:5,' does not exist.')
else
with hist[id]^ do
begin
for i := 1 to nch do
data[i] := 0;
overflow := 0;
underflow := 0;
end;
end; (*** hreset ****)
procedure add_node(ptr:word_ptr;s:string);
begin
nwords := nwords+1;
with ptr^ do
begin
word := s;
below := nil;
above := nil;
occurence := nil;
count_as := nil;
ending := nil;
n_occur := 0;
class := unknown;
subclass := unknown;
morph := unknown;
end;
end; (*** add_node ***)
procedure find_in_list(s:string; sleng:integer; var list : name_array; var index:integer);
var i,j : integer;
match : array[0..max_class] of boolean;
begin
for i := 0 to max_class do
match[i] := true;
for i := 1 to sleng do
for j := 0 to max_class do
if ( s[i] <> list[j][i] ) then
match[j] := false;
j := 0;
for i := 0 to max_class do
if ( match[i] ) then
begin
j := j+1;
index := i;
end;
if ( j = 0 ) then
index := -1
else if ( j > 1 ) then
index := -2;
end; (*** find_in_list ***)
procedure init;
var i : integer;
c : char;
begin
swedish_chars := 'åäöÅÄÖ';
lowercase := ['0'..'9','a'..'z'];
for i := 1 to 3 do
lowercase := lowercase + [swedish_chars[i]];
for i := 1 to 80 do
blank[i] := ' ';
nwords := 0;
sent_root := nil;
word_root := nil;
word2_root := nil;
s := 'lroot';
new(word_root);
add_node(word_root,s);
new(endroot);
s := 'lroot';
add_node(endroot,s);
new(sent_root);
with sent_root^ do
begin
words := nil;
nwords := 0;
nclause := 0;
phrase_type[0] := 19;
next := nil;
end;
current_sent := sent_root;
class_name[unknown] := 'unknown';
for i := 0 to max_class do
begin
class_name[i] := class_name[unknown];
subclass_name[i] := class_name[unknown];
morph_name[i] := class_name[base];
command_name[i] := '????????';
p_name[i] := '????????';
option_name[i] := '?';
sentence_name[i] := '????????';
wordorder_name[i] := '????????';
end;
class_name[ask] := 'ask';
class_name[special] := 'special';
class_name[verb] := 'verb';
class_name[noun] := 'noun';
class_name[adj ] := 'adjective';
class_name[adv ] := 'adverb';
class_name[prep] := 'preposition';
class_name[pron] := 'pronoun';
class_name[count]:= 'count';
class_name[conj] := 'conjunction';
class_name[subj] := 'subjunction';
class_name[intj] := 'interjection';
class_name[art] := 'article';
subclass_name[ask] := 'ask';
subclass_name[special] := 'special';
subclass_name[intrans] := 'intransitive';
subclass_name[trans ] := 'transitive';
subclass_name[mass ] := 'mass';
subclass_name[count ] := 'count';
subclass_name[pers ] := 'personal';
subclass_name[rel ] := 'relative';
subclass_name[dem ] := 'demonstrative';
subclass_name[quest ] := 'interrogative';
subclass_name[neg ] := 'negative';
subclass_name[aux ] := 'auxiliary';
subclass_name[part ] := 'participle';
morph_name[ask] := 'ask';
morph_name[special] := 'special';
morph_name[base ] := 'base';
morph_name[inf ] := 'infinitive';
morph_name[pret ] := 'preteritum';
morph_name[pres ] := 'present';
morph_name[sdef ] := 'sdef';
morph_name[pldef ] := 'pldef';
morph_name[sidef ] := 'sidef';
morph_name[plidef ] := 'plidef';
morph_name[sing ] := 'singular';
morph_name[plur ] := 'plural';
command_name[c_read ] := 'read';
command_name[c_write ] := 'write';
command_name[c_define] := 'define';
command_name[c_select] := 'select';
command_name[c_show ] := 'show';
command_name[c_quit ] := 'quit';
command_name[c_exit ] := 'exit';
command_name[c_clear ] := 'clear';
command_name[c_sort ] := 'sort';
command_name[c_help ] := 'help';
p_name[p_lexicon ] := 'lexicon';
p_name[p_corpus ] := 'corpus';
p_name[p_word ] := 'words';
p_name[p_context ] := 'context';
p_name[p_structure] := 'structure';
p_name[p_class ] := 'class';
p_name[p_command ] := 'command';
p_name[p_histogram] := 'histogram';
p_name[p_selection] := 'selection';
p_name[p_sentence ] := 'sentence';
p_name[p_ending ] := 'ending';
p_name[p_subclause] := 'subclause';
p_name[p_wordorder] := 'wordorder';
p_name[p_subwo ] := 'subwordorder';
option_name[o_class ] := 'c';
option_name[o_subclass] := 's';
option_name[o_morph ] := 'm';
option_name[o_root ] := 'r';
option_name[o_ending ] := 'e';
option_name[o_ordered ] := 'o';
option_name[o_initial ] := 'i';
sentence_name[0] := 'Unknown';
sentence_name[1] := 'One finite verb, no neg';
sentence_name[2] := 'Auxiliary + infinitive, no neg';
sentence_name[3] := 'Auxiliary + participle, no neg';
sentence_name[4] := 'One finite verb, neg before';
sentence_name[5] := 'One finite verb, neg after';
sentence_name[6] := 'Auxiliary + inf/part, neg before aux';
sentence_name[7] := 'Auxiliary + inf/part, neg between verbs';
sentence_name[8] := 'Auxiliary + inf/part, neg after verbs';
sentence_name[9] := 'Main + sub';
sentence_name[10]:= 'Complex verbs, no neg';
sentence_name[11]:= 'Complex verbs, with neg';
sentence_name[12]:= 'Anomalous';
sentence_name[13]:= 'Only infinite verb, neg before';
sentence_name[14]:= 'Only infinite verb, neg after';
sentence_name[15]:= 'Multiple subs';
sentence_name[16]:= 'Anomalous';
sentence_name[17]:= 'Anomalous';
sentence_name[18]:= 'Anomalous';
sentence_name[19]:= 'Undefined';
sentence_name[20]:= 'Anomalous';
wordorder_name[sv] := 'sv.';
wordorder_name[vs] := 'vs.';
wordorder_name[v1] := 'v1';
wordorder_name[v2] := 'v2';
wordorder_name[vns] := 'vns';
wordorder_name[vsn] := 'vsn';
wordorder_name[nvs] := 'nvs';
wordorder_name[snv] := 'snv';
wordorder_name[svn] := 'svn';
wordorder_name[nsv] := 'nsv';
wordorder_name[nv] := 'nv.';
wordorder_name[vn] := 'vn.';
quit := false;
icommand := 0;
for i := 1 to max_commands do
cline[i] := blank;
for i := 1 to 100 do
hist[i] := nil;
hbook1(1,'Word length',20,0.5,20.5);
hbook1(2,'Sentence length',40,0.5,40.5);
hbook1(3,'Word classes',20,0.5,20.5);
hbook1(4,'Sentence types',20,0.5,20.5);
hbook1(5,'Subclause types',20,0.5,20.5);
hbook1(6,'# clauses in sentence',20,0.5,20.5);
hbook1(7,'# words in corpus',40,0.0,800.0);
hbook1(8,'Word order (sv/vs)',20,0.5,20.5);
hbook1(9,'# finite verbs',20,-0.5,19.5);
sel_phrase_type := -1;
sel_sub_type := -1;
nsel_words := 0;
sel_ordered := false;
sel_initial := false;
sel_wo := -1;
sel_subwo := -1;
end; (*** init ***)
procedure write_word(s:string);
var i : integer;
begin
for i := 1 to 80 do
if ( s[i] > ' ' ) then
begin
write(s[i]);
k := k+1;
end;
write(' ');
end; (*** write_word ***)
procedure clean_word(var s:string; var wleng,flag:integer; var cstop:char);
var i,j,k,kk : integer;
c : char;
begin
k := 0;
c := ' ';
flag := 0;
for i := 1 to 80 do
if ( s[i] <> ' ' ) then
begin
k := k+1;
if ( s[i] in ['A'..'Z'] ) then
s[i] := chr(ord(s[i])+32)
else if ( s[i] = swedish_chars[4] ) then
s[i] := swedish_chars[1]
else if ( s[i] = swedish_chars[5] ) then
s[i] := swedish_chars[2]
else if ( s[i] = swedish_chars[6] ) then
s[i] := swedish_chars[3]
else if not ( s[i] in lowercase ) then
begin
c := s[i];
s[i] := ' ';
j := i;
end;
end;
if ( c <> ' ' ) and ( j = k ) then
begin
case c of
'.' : flag := 1;
'?' : flag := 1;
'!' : flag := 1;
',' : flag := 2;
';' : flag := 2;
':' : flag := 2;
')' : flag := 3;
end;
cstop := c;
end;
kk := k;
i := 1;
for i := 1 to k do
while ( s[i] = ' ' ) and ( i <= kk ) do
begin
for j := i+1 to k do
s[j-1] := s[j];
s[k] := ' ';
kk := kk-1;
end;
wleng := kk;
end; (*** clean_word ***)
procedure extract_word(line:string; var jline:integer; var s:string);
var i,j1,j2 : integer;
begin
s := blank;
j1 := jline;
while ( j1 < 80 ) and ( line[j1] = ' ' ) do
j1 := j1+1;
if ( j1 = 80 ) then
jline := -999
else
begin
j2 := j1;
while ( j2 < 80 ) and ( line[j2] <> ' ' ) do
j2 := j2+1;
jline := j2;
j2 := j2-1;
for i := j1 to j2 do
s[i-j1+1] := line[i];
end;
end; (*** extract_word ***)
procedure special_word(ppp:word2_ptr);
var i,j,k : integer;
pp : word2_ptr;
begin
if ( ppp^.word^.word = 'att' ) then
begin
ppp^.class := unknown;
pp := ppp^.next;
while ( pp <> nil ) do
begin
if ( pp^.class = verb ) then
begin
if ( pp^.morph = inf ) then
ppp^.class := infm
else
ppp^.class := subj;
pp := nil;
end
else
pp := pp^.next;
end;
end
else if ( ppp^.word^.word = 'det' ) or ( ppp^.word^.word = 'detta' ) then
begin
ppp^.class := pron;
pp := ppp^.next;
while ( pp <> nil ) do
begin
if ( pp^.class = verb ) then
pp := nil
else if ( pp^.class = noun ) then
begin
ppp^.class := art;
pp := nil;
end
else
pp := pp^.next;
end;
end;
end; (***** special_word ******)
procedure ask_word(pp:word2_ptr);
begin
end; (***** ask_word ******)
procedure find_in_tree(s:string;start_ptr:word_ptr; var pword:word_ptr; do_add:boolean);
var wp : word_ptr;
begin
with start_ptr^ do
begin
if ( s = word ) then
begin
pword := start_ptr;
end
else if ( s < word ) then
begin
if ( below = nil ) then
begin
if do_add then
begin
new(below);
add_node(below,s);
pword := below;
end
else
pword := NIL;
end
else
find_in_tree(s,below,pword,do_add);
end
else if ( s > word ) then
begin
if ( above = nil ) then
begin
if do_add then
begin
new(above);
add_node(above,s);
pword := above;
end
else
pword := NIL;
end
else
find_in_tree(s,above,pword,do_add);
end;
end;
end; (*** find_in_tree ***)
procedure print_sentence(sent:sentence_ptr);
var pp : word2_ptr;
i : integer;
begin
pp := sent^.words;
while ( pp <> nil ) do
begin
write_word(pp^.word^.word);
pp := pp^.next;
end;
writeln(sent^.cstop);
writeln(sent^.nwords:4,' words in sentence.');
writeln('Sentence type : ',sentence_name[sent^.phrase_type[0]]);
for i:= 1 to sent^.nclause do
writeln(' Subclause type : ',sentence_name[sent^.phrase_type[i]]);
end; (*** print_sentence ***)
procedure print_context(ptr:word_ptr);
var pp : word2_ptr;
sp : sentence_ptr;
begin
pp := ptr^.occurence;
while ( pp <> nil ) do
begin
print_sentence(pp^.sentence);
pp := pp^.occurence;
end;
end; (*** print_context ***)
procedure print_tree(ptr:word_ptr);
begin
if ( ptr <> NIL ) then
begin
print_tree(ptr^.below);
with ptr^ do
begin
write(word:27,' -c ',class_name[class]:9);
write( ' -s ',subclass_name[subclass]:9);
write( ' -m ',morph_name[morph]:9);
if ( ending <> nil ) then
begin
write( ' -e ',ending^.word:10);
if ( count_as <> nil ) then
begin
writeln;
write(word:27);
end;
end;
if ( count_as <> nil ) then
write( ' -r ',count_as^.word:16);
writeln(' [',ptr^.n_occur:3,']');
(**print_context(ptr);**)
end;
print_tree(ptr^.above);
end;
end; (*** print_tree ***)
procedure clear_tree(ptr:word_ptr);
begin
if ( ptr <> NIL ) then
begin
clear_tree(ptr^.below);
with ptr^ do
n_occur := 0;
clear_tree(ptr^.above);
end;
end; (*** clear_tree ***)
procedure count_tree(ptr:word_ptr; var nlex,nonce:integer);
begin
if ( ptr <> NIL ) then
begin
count_tree(ptr^.below,nlex,nonce);
with ptr^ do
if ( n_occur > 0 ) then
begin
nlex := nlex + 1;
if ( n_occur = 1 ) then
nonce := nonce + 1;
end;
count_tree(ptr^.above,nlex,nonce);
end;
end; (*** count_tree ***)
procedure add_to_sentence(sent:sentence_ptr; lexword:word_ptr; newflag:integer);
var pp,pp2 : word2_ptr;
ww : word_ptr;
begin
with sent^ do
begin
nwords := nwords + 1;
if ( words = nil ) then
begin
(*new_word2(words);*)
new(words);
words^.next := nil;
pp := words;
end
else
begin
pp := words;
while ( pp^.next <> nil ) do
pp := pp^.next;
(*new_word2(pp^.next);*)
new(pp^.next);
pp := pp^.next;
pp^.next := nil;
end;
with pp^ do
begin
sentence := sent;
word := lexword;
if ( word^.count_as = nil ) then
ww := word
else
ww := word^.count_as;
ww^.n_occur := ww^.n_occur+1;
if ( ww^.occurence = nil ) then
ww^.occurence := pp
else
begin
pp2 := ww^.occurence;
while ( pp2^.occurence <> nil ) do
pp2 := pp2^.occurence;
pp2^.occurence := pp;
end;
if ( word^.ending <> nil ) then
begin
word^.ending^.n_occur := word^.ending^.n_occur+1;
if ( word^.ending^.occurence = nil ) then
word^.ending^.occurence := pp
else
begin
pp2 := word^.ending^.occurence;
while ( pp2^.occurence <> nil ) do
pp2 := pp2^.occurence;
pp2^.occurence := pp;
end;
end;
class := word^.class;
subclass := word^.subclass;
morph := word^.morph;
next := nil;
occurence := nil;
flag := newflag;
end;
end;
end; (*** add_to_sentence ***)
procedure analyze_phrase(nwords:integer;clist,slist,mlist:list_type;
var sub_type:shortlist_type;
sub_index:integer; var nclause:integer; var word_order:shortlist_type);
var nw : integer;
i,j,k : integer;
nsubj,nconj,nvf,nvi,nvp,nneg,nknown,nask,nspecial : integer;
verbpos,negpos,vip,vpp,nnp,nppos : integer;
boundpos : integer;
cll,sll,mll : list_type;
dum,dum2,dum3 : integer;
begin
sub_type[sub_index] := unknown;
word_order[sub_index] := 20;
nsubj := 0;
nconj := 0;
for i := nwords downto 1 do
if ( clist[i] = subj ) then
begin
nsubj := nsubj + 1;
boundpos := i;
end
else if (( clist[i] = conj ) or (( clist[i] = pron ) and ( slist[i] = rel ))) then
begin
nconj := nconj + 1;
boundpos := i;
end;
nclause := nsubj+nconj+1;
if (( nsubj = 0 ) and ( nconj = 0 )) then
begin
verbpos := 0;
negpos := 0;
nppos := 0;
nvf := 0;
nvi := 0;
nneg := 0;
nnp := 0;
for i := 1 to nwords do
if ( clist[i] = verb ) then
begin
if (( mlist[i] = inf ) or ( slist[i] = part )) then
begin
nvi := nvi + 1;
end
else
begin
writeln(morph_name[mlist[i]]);
nvf := nvf + 1;
verbpos := i;
end
end
else if ( slist[i] = neg ) then
begin
nneg := nneg + 1;
negpos := i;
end
else if (( clist[i] = noun ) or ( slist[i] = pers )) then
begin
nnp := nnp+1;
nppos := i;
end;
writeln('nvf,nvi,nneg,nnp = ',nvf,nvi,nneg,nnp);
hfilli(9,nvf);
if ( nvf = 1 ) then
begin
if ( nneg = 0 ) then
begin
if ( nvi = 0 ) then
sub_type[sub_index] := 1
else
begin
nvi := 0;
nvp := 0;
for i := 1 to nwords do
if ( clist[i] = verb ) then
if ( mlist[i] = inf ) then
begin
nvi := nvi + 1;
vip := i;
end
else if ( slist[i] = part ) then
begin
nvp := nvp + 1;
vpp := i;
end;
if ( nvi = 1 ) and ( nvp = 0 ) then
sub_type[sub_index] := 2
else if ( nvi = 0 ) and ( nvp = 1 ) then
sub_type[sub_index] := 3
else
sub_type[sub_index] := 10;
end
end
else
begin
if ( nvi = 0 ) then
if ( negpos < verbpos ) then
sub_type[sub_index] := 4
else
sub_type[sub_index] := 5
else
begin
nvi := 0;
nvp := 0;
for i := 1 to nwords do
if ( clist[i] = verb ) then
if ( mlist[i] = inf ) then
begin
nvi := nvi + 1;
vip := i;
end
else if ( slist[i] = part ) then
begin
nvp := nvp + 1;
vip := i;
end;
if ((( nvi = 1 ) and ( nvp = 0 ))
or (( nvi = 0 ) and ( nvp = 1 ))) then
if ( negpos < verbpos ) then
sub_type[sub_index] := 6
else if ( negpos < vip ) then
sub_type[sub_index] := 7
else
sub_type[sub_index] := 8
else
sub_type[sub_index] := 11;
end
end;
if ( nnp = 1 ) then
if ( nneg = 0 ) then
if ( nppos > verbpos ) then
word_order[sub_index] := vs
else
word_order[sub_index] := sv
else
if ( nppos > verbpos ) then
if ( negpos > nppos ) then
word_order[sub_index] := vsn
else if ( negpos > verbpos ) then
word_order[sub_index] := vns
else
word_order[sub_index] := nvs
else
if ( negpos < nppos ) then
word_order[sub_index] := nsv
else if ( negpos < verbpos ) then
word_order[sub_index] := snv
else
word_order[sub_index] := svn
else if ( nneg > 0 ) then
if ( negpos > verbpos ) then
word_order[sub_index] := vn
else
word_order[sub_index] := nv;
writeln('Word order = ',word_order[sub_index],'; nppos,verbpos,negpos = ',nppos,verbpos,negpos);
end
else
sub_type[sub_index] := 12;
end
else
begin
sub_type[sub_index] := 9;
(*if ( nsubj + nconj = 1 ) then*)
begin
k := 0;
for i := 1 to boundpos-1 do
begin
k := k+1;
cll[k] := clist[i];
sll[k] := slist[i];
mll[k] := mlist[i];
end;
analyze_phrase(k,cll,sll,mll,sub_type,sub_index,dum2,word_order);
k := 0;
for i := boundpos+1 to nwords do
begin
k := k+1;
cll[k] := clist[i];
sll[k] := slist[i];
mll[k] := mlist[i];
end;
analyze_phrase(k,cll,sll,mll,sub_type,sub_index+1,dum2,word_order);
end;
(***
else
sub_type[sub_index+1] := 15;
*****)
end;
end; (*** analyze_phrase ***)
procedure analyze_sentence(sent:sentence_ptr);
var pp : word2_ptr;
clist,slist,mlist : list_type;
nw : integer;
i,j,k : integer;
nsubj,nconj,nvf,nvi,nvp,nneg,nknown,nask,nspecial : integer;
verbpos,negpos,vip,vpp : integer;
begin
with sent^ do
begin
pp := words;
nw := 0;
nknown := 0;
nask := 0;
nspecial := 0;
while ( pp <> nil ) do
begin
nw := nw+1;
clist[nw] := pp^.class;
if ( clist[nw] <> unknown ) then
nknown := nknown + 1
else
writeln('Unknown word : ',pp^.word^.word);
if ( clist[nw] = special ) then
nspecial := nspecial + 1
else if ( clist[nw] = ask ) then
nask := nask + 1;
slist[nw] := pp^.subclass;
mlist[nw] := pp^.morph;
pp := pp^.next;
end;
if ( nw <> nwords ) then
writeln('Funny! nw <> nwords in analyze_sentence; ',nw,nwords);
if ( nknown > 0 ) then (** Only analyze if some words known **)
begin
if ( nknown-nask-nspecial < nwords ) then
begin
pp := words;
while ( pp <> nil ) do
begin
if ( pp^.class = special ) then
special_word(pp);
if (( pp^.class = unknown ) or ( pp^.class = ask )) then
ask_word(pp);
pp := pp^.next;
end;
end;
for i := 1 to nwords do
hfilli(3,clist[i]);
for i := 0 to 15 do
phrase_type[i] := 19;
analyze_phrase(nwords,clist,slist,mlist,phrase_type,0,nclause,word_order);
for i := 0 to 15 do
if ( phrase_type[i] <> 19 ) then
nclause := i;
end;
hfilli(4,phrase_type[0]);
for i := 1 to nclause do
hfilli(5,phrase_type[i]);
hfilli(6,nclause+1);
for i := 0 to nclause do
hfilli(8,word_order[i]);
end;
print_sentence(sent);
end; (*** analyze_sentence ***)
procedure read_lexicon;
var i,j,k : integer;
c,option: char;
index : integer;
pword,pw2 : word_ptr;
line : string;
begin
open(infile,'*Filename:','OLD',status);
reset(infile);
while not eof(infile) do
begin
readln(infile,line);
jline := 1;
s := blank;
extract_word(line,jline,s);
clean_word(s,wleng,flag,c);
if ( wleng > 0 ) then
begin
find_in_tree(s,word_root,pword,true);
option := ' ';
repeat
extract_word(line,jline,s);
if ( s[1] = '-' ) then
option := s[2]
else
begin
clean_word(s,wleng,flag,c);
k := -1;
for i := 1 to max_class do
if ( option_name[i] = option ) then
k := i;
case k of
o_class : begin
find_in_list(s,wleng,class_name,index);
if ( index > 0 ) then
pword^.class := index
else
pword^.class := unknown;
end;
o_subclass: begin
find_in_list(s,wleng,subclass_name,index);
if ( index > 0 ) then
pword^.subclass := index
else
pword^.subclass := unknown;
end;
o_morph : begin
find_in_list(s,wleng,morph_name,index);
if ( index > 0 ) then
pword^.morph := index
else
pword^.morph := unknown;
end;
o_root : begin
find_in_tree(s,word_root,pw2,true);
pword^.count_as := pw2;
end;
o_ending: begin
find_in_tree(s,endroot,pw2,true);
pword^.ending := pw2;
end;
end;
option := '0';
end;
until ( jline < 0 );
end
else
writeln('ERROR line |',line,'|');
end;
close(infile);
end; (*** read_lexicon ***)
procedure write_corpus;
var sent : sentence_ptr;
begin
sent := sent_root;
while ( sent <> nil ) do
begin
print_sentence(sent);
sent := sent^.next;
end;
end; (*** write_corpus ***)
procedure write_lexicon(root:word_ptr);
begin
print_tree(root^.below);
print_tree(root^.above);
end; (*** write_lexicon ***)
procedure clear_words(root:word_ptr);
begin
clear_tree(root^.below);
clear_tree(root^.above);
end; (*** clear_words ***)
procedure read_corpus;
var c:char;
jline : integer;
s : string;
wleng : integer;
cp : sentence_ptr;
nonce,nlex : integer;
begin
open(infile,'*Filename:','OLD',status);
reset(infile);
clear_words(word_root);
hreset(0);
nwtot := 0;
nwsent := 0;
nsent := 0;
wlsum := 0;
while not eof(infile) do
begin
readln(infile,line);
jline := 1;
if ( line[1] <> '%' ) then
repeat
s := blank;
extract_word(line,jline,s);
clean_word(s,wleng,flag,c);
if ( wleng > 0 ) then
begin
nwtot := nwtot + 1;
nwsent := nwsent+1;
wlsum := wlsum + wleng;
hfilli(1,wleng);
find_in_tree(s,word_root,pword,true);
add_to_sentence(current_sent,pword,flag);
end;
if ( flag = 1 ) then (*** end of sentence ***)
begin
nsent := nsent+1;
nwsent := 0;
current_sent^.cstop := c;
analyze_sentence(current_sent);
hfilli(2,current_sent^.nwords);
new_sent(cp);
current_sent := cp;
with current_sent^ do
begin
words := nil;
nwords := 0;
nclause := 0;
phrase_type[0] := 19;
next := nil;
end;
end;
until ( jline < 0 );
end;
nlex := 0;
nonce := 0;
count_tree(word_root^.above,nlex,nonce);
count_tree(word_root^.below,nlex,nonce);
writeln('Total number of words = ',nwtot:8);
if ( nwtot > 0 ) then
writeln('Average wordlength = ',wlsum/nwtot:8:2);
writeln('Total number of sentences = ',nsent:8);
if ( nsent > 0 ) then
writeln('Average number of words per sentence = ',nwtot/nsent:8:2);
writeln('Different lexicon words = ',nlex:8);
writeln('Words used once only = ',nonce:8);
writeln;
hfilli(7,nwtot);
close(infile);
end; (*** read_corpus ***)
function wo_ok(sent:sentence_ptr;wo:integer): boolean;
begin
wo_ok := false;
if ( wo = v1 ) then
begin
if ( sent^.words <> nil ) then
if ( sent^.words^.class = verb ) then
wo_ok := true;
end
else if ( wo = v2 ) then
begin
if ( sent^.words <> nil ) then
if ( sent^.words^.next <> nil ) then
if ( sent^.words^.next^.class = verb ) then
wo_ok := true;
end
else
wo_ok := (sent^.word_order[0] = wo);
end;
procedure write_selection;
var sent : sentence_ptr;
pword : word2_ptr;
i,j,k : integer;
ok : boolean;
begin
if ( sel_phrase_type < 0 ) and ( nsel_words <= 0 ) and ( sel_sub_type <= 0 )
and ( sel_wo <= 0 ) and ( sel_subwo <= 0 ) then
writeln('No selection active.')
else
begin
if ( sel_phrase_type >= 0 ) then
writeln('Phrase structure selected : '
,sel_phrase_type:2,sentence_name[sel_phrase_type]);
if ( sel_sub_type >= 0 ) then
writeln('Subclause structure selected : '
,sel_sub_type:2,sentence_name[sel_sub_type]);
if ( sel_wo >= 0 ) then
writeln('Word order selected : ',wordorder_name[sel_wo]);
if ( sel_subwo >= 0 ) then
writeln('Subclause word order selected : ',wordorder_name[sel_subwo]);
if ( sel_initial ) then
writeln('Initial word selected : ',sel_words[1]^.word:20)
else
begin
if ( nsel_words > 0 ) then
begin
write('Words selected : ');
for i := 1 to nsel_words do
write(sel_words[i]^.word:15,' ');
writeln;
end;
if ( sel_ordered ) then
writeln('Order of words significant')
else
writeln('Order of words not significant');
end;
sent := sent_root;
while ( sent <> nil ) do
begin
if ( sel_phrase_type <= 0 )
or ( sent^.phrase_type[0] = sel_phrase_type ) then
begin
if ( sel_sub_type <= 0 ) then
ok := true
else
begin
ok := false;
for i := 1 to sent^.nclause do
if ( sent^.phrase_type[i] = sel_sub_type ) then
ok := true;
end;
if ( ok ) then
if ( sel_wo <= 0 )
or ( wo_ok(sent,sel_wo) ) then
begin
if ( sel_subwo <= 0 ) then
ok := true
else
begin
ok := false;
for i := 1 to sent^.nclause do
if ( sent^.word_order[i] = sel_subwo ) then
ok := true;
end;
if ( ok ) then
begin
if ( nsel_words = 0 ) then
print_sentence(sent)
else if ( sel_initial ) then
begin
pword := sent^.words;
if ( pword <> nil ) then
if ( pword^.word = sel_words[1] ) then
print_sentence(sent);
end
else
begin
k := 0;
if ( sel_ordered ) then
begin
i := 1;
pword := sent^.words;
while ( pword <> nil ) do
begin
if ( pword^.word = sel_words[i] ) then
begin
k := k+1;
i := i+1;
if ( i > 3 ) then
pword := nil;
end
else
pword := pword^.next;
end;
end
else
for i := 1 to nsel_words do
begin
pword := sent^.words;
while ( pword <> nil ) do
begin
if ( pword^.word = sel_words[i] ) then
begin
k := k+1;
pword := nil;
end
else
pword := pword^.next;
end;
end;
if ( k = nsel_words ) then
print_sentence(sent);
end;
end;
end;
end;
sent := sent^.next;
end;
end;
end; (*** write_selection ***)
procedure help(cindex:integer);
var i,j,k : integer;
printit : boolean;
line : string;
wleng,flag : integer;
c : char;
begin
open(cfile,'help','OLD',status);
reset(cfile);
if ( cindex < 0 ) then
printit := true
else
printit := false;
while not eof(cfile) do
begin
readln(cfile,line);
if ( line[1] = '!' ) then
begin
if ( cindex > 0 ) then
begin
clean_word(line,wleng,flag,c);
find_in_list(line,wleng,command_name,k);
if ( k = cindex ) then
printit := true
else
printit := false;
end
end
else if printit then
writeln(line);
end;
end;(*** help ****)
procedure clear_sentence(var p1:sentence_ptr);
var w1,w2,w3 : word2_ptr;
ww : word_ptr;
begin
end; (*** clear_sentence ***)
procedure clear_occurences(ww : word_ptr);
var w1,w2 : word2_ptr;
begin
if ( ww <> nil ) then
begin
w1 := ww^.occurence;
while ( w1 <> nil ) do
begin
w2 := w1^.occurence;
dispose(w1);
ww^.n_occur := ww^.n_occur-1;
w1 := w2;
end;
if ( ww^.n_occur <> 0 ) then
writeln('n_occur = ',ww^.n_occur,' for ',ww^.word);
ww^.occurence := nil;
if ( ww^.ending <> nil ) then
begin
ww^.ending^.n_occur := 0;
ww^.ending^.occurence := nil;
end;
clear_occurences(ww^.above);
clear_occurences(ww^.below);
end;
end; (*** clear_occurences ***)
procedure clear_corpus;
var i,j,k : integer;
p1,p2,p3 : sentence_ptr;
w1,w2,w3 : word2_ptr;
ww : word_ptr;
begin
end; (*** clear_corpus ***)
procedure do_command(clline:string);
var s,cclline :string;
jline,wleng,flag : integer;
c : char;
cindex,pindex : integer;
i,j,k : integer;
begin
jline := 1;
extract_word(clline,jline,s);
if ( jline > 0 ) then
begin
clean_word(s,wleng,flag,c);
find_in_list(s,wleng,command_name,cindex);
if ( cindex < 0 ) then
begin
if ( cindex = -2 ) then
begin
write('>> ');
write_word(s);
writeln('<< AMBIGUOUS.');
end
else
begin
write('>> ');
write_word(s);
writeln('<< NOT UNDERSTOOD.');
end
end
else
begin
extract_word(clline,jline,s);
clean_word(s,wleng,flag,c);
if ( wleng = 0 ) then
pindex := 0
else
find_in_list(s,wleng,p_name,pindex);
if (( pindex < 0 ) and ( cindex <> c_help )) then
begin
if ( pindex = -2 ) then
begin
write('>> ');
write_word(s);
writeln('<< AMBIGUOUS.');
end
else
begin
write('>> ');
write_word(s);
writeln('<< NOT UNDERSTOOD.');
end
end
else
begin
case cindex of
c_exit,c_quit : begin
writeln('QUIT');
quit := true;
end;
c_read : case pindex of
p_corpus : read_corpus;
p_lexicon : read_lexicon;
p_command : begin
open(cfile,'*Filename:','OLD',status);
reset(cfile);
while not eof(cfile) do
begin
readln(cfile,cclline);
writeln(cclline);
do_command(cclline);
end;
close(cfile);
end;
end;
c_write,c_show: case pindex of
p_corpus : write_corpus;
p_lexicon,p_word : write_lexicon(word_root);
p_ending : write_lexicon(endroot);
p_histogram : begin
hprint(0);
hptab(3,class_name);
hptab(4,sentence_name);
hptab(5,sentence_name);
hptab(8,wordorder_name);
end;
p_selection : write_selection;
p_command : begin
if ( icommand < max_commands ) then
for i := icommand+1 to max_commands do
writeln(cline[i]);
if ( icommand > 1 ) then
for i := 1 to icommand-1 do
writeln(cline[i]);
end;
end;
c_clear: case pindex of
p_histogram : hreset(0);
p_selection : begin
nsel_words := 0;
sel_phrase_type := -1;
end;
p_corpus : begin
(**clear_corpus;***)
writeln('This command has a bug; disabled.');
end;
p_lexicon,p_word : clear_words(word_root);
end;
c_help: begin
find_in_list(s,wleng,command_name,k);
help(k);
end;
c_select: case pindex of
p_word : begin
nsel_words := 0;
sel_ordered := false;
sel_initial := false;
for i := 1 to 10 do
if ( jline < 80 ) then
begin
extract_word(clline,jline,s);
writeln('|',s,'|');
if ( s[1] = '-' ) then
begin
if ( s[2] = 'o' ) then
begin
sel_ordered := true;
sel_initial := false;
end
else if ( s[2] = 'i' ) then
begin
sel_initial := true;
sel_ordered := false;
end
end
else if ( nsel_words+1 <= 3 ) then
begin
clean_word(s,wleng,flag,c);
writeln('|',s,'|');
if ( wleng > 0 ) then
begin
find_in_tree(s,word_root,sel_words[nsel_words+1],false);
writeln('|',s,'|');
if ( sel_words[nsel_words+1] <> NIL ) then
nsel_words := nsel_words+1
else
writeln(s,' not found in lexicon.');
end;
end;
end;
writeln(nsel_words:2,' words selected.');
end;
p_sentence,p_structure :
begin
sel_phrase_type := -1;
extract_word(clline,jline,s);
if ( wleng > 0 ) then
begin
sel_phrase_type := string_to_int(s);
if not ( sel_phrase_type in [1..20] ) then
begin
writeln('Selected structure must be between 1 and 20; not ',s);
sel_phrase_type := -1;
end
else
writeln('Phrase structure selected : '
,sel_phrase_type:2,',',sentence_name[sel_phrase_type]);
end;
end;
p_subclause :
begin
sel_sub_type := -1;
extract_word(clline,jline,s);
if ( wleng > 0 ) then
begin
sel_sub_type := string_to_int(s);
if not ( sel_sub_type in [1..20] ) then
begin
writeln('Selected structure must be between 1 and 20; not ',s);
sel_phrase_type := -1;
end
else
writeln('Subclause structure selected : '
,sel_sub_type:2,',',sentence_name[sel_sub_type]);
end;
end;
p_wordorder :
begin
sel_wo := -1;
extract_word(clline,jline,s);
clean_word(s,wleng,flag,c);
find_in_list(s,wleng,wordorder_name,cindex);
if ( cindex = -1 ) then
writeln('Word order ',s,' not recognized.')
else if ( cindex = -2 ) then
writeln('Word order ',s,' ambiguous.')
else
sel_wo := cindex;
end;
p_subwo :
begin
sel_subwo := -1;
extract_word(clline,jline,s);
clean_word(s,wleng,flag,c);
find_in_list(s,wleng,wordorder_name,cindex);
if ( cindex = -1 ) then
writeln('Word order ',s,' not recognized.')
else if ( cindex = -2 ) then
writeln('Word order ',s,' ambiguous.')
else
sel_subwo := cindex;
end;
end;
end;
end;
end;
end;
end; (*** do_command ***)
begin (*** main program ***)
init;
repeat
icommand := icommand+1;
if ( icommand > max_commands ) then
icommand := 1;
cline[icommand] := blank;
write('Command >> ');
readln(cline[icommand]);
do_command(cline[icommand]);
until quit;
end.