program ev(list,all,evp,output);
(* evolution of binding sites
Tom Schneider
permanent email: toms@alum.mit.edu
toms@alum.mit.edu
https://alum.mit.edu/www/toms/
module libraries required: delman, delmods, prgmods, matmods
*)
label 1; (* end of program *)
const
(* begin module version *)
version = 3.69; (* of ev.p 2001 June 6
3.69 [2001 Jun 6] upgrade documentation
3.68 [2001 Jun 6] SPECIAL RULE can be turned off by parameter
3.66 [2000 Jul 16] upgrade documentation
3.64 [2000 Jul 16] upgrade documentation
3.63 [2000 Jul 15] upgrade documentation
3.62 [2000 Jul 10] add more evp examples
3.61 [2000 Jul 9] upgrade documentation for publication release
3.57 [1999 Jul 24] clean up code a bit
3.55 [1999 Jul 22] Hg gets small sample correction.
3.42 [1999 Feb 17] sigma implemented
3.42 [1999 Feb 17] version number of program in parameter file
3.36 [1999 Feb 17] ability to only mutate the newly replicated half of the
population. This preserves good mutations. (variable mutatehalf)
This takes longer and is not a good idea!
1999 Feb 17: upgrade to more modern time modules, solve y2k problem
1998 March 7: upgrade to allow random placement of sites.
previous change: 1989 December 14
origin: 1984 april 17.
*)
updateversion = 3.68; (* defines lowest acceptable current parameter file *)
highversion = 15; (* defines highest acceptable current parameter file
This allows old files with (eg) 16 organisms to be
flagged as old. *)
(* end module version *)
(* begin module describe.ev *)
(*
name
ev: evolution of binding sites
synopsis
ev(list: out, all: inout, evp: inout, output: out)
files
list: a record of the evolutionary events that occurred. See the
description of evp for the kinds of data that can be printed. The data
may be graphed as desired using the xyplo program. The first few lines
of the file form an informative header. All of these lines begin with
an asterisk, '*', which acts as a comment. The data themselves are
organized into individual lines broken by spaces into a set of tokens.
These are described in the header.
all: All the variables, genome sequences and genetic structure to allow
continuation of the evolution. Start this as an empty file. Under
unix you can use:
touch all
After the ev program has been run, the all file will be filled with the
last generation. If you run ev again, it will use the current all file
to continue. To make a movie, step by 1 or more generations and use
evd to extract information from the all file. When you want to start
over, clear the file out. To do this under unix you can:
echo "" > all
evp: parameters to control the program, one per line:
Number of creatures (c, integer).
Number of potential binding sites per creature (G, integer).
Number of sites per creature (gamma, integer).
Width of the recognizer in bases (width, integer).
Bases per integer of the recognizer weights (bpi, integer).
Mutation rate in hits per creature per generation (mu, integer).
Seed: a real number between 0 and 1 used to start the random
number generator. The date and time is used if this
number is outside 0 to 1.
Cycles: number of additional generations to run (cycles, integer).
Display interval: for example, 10 means every 10th generation.
Display control: the first 7 characters on the line control
the kind of data printed to the list file:
a = display average number of mistakes and the standard
deviation for the population.
c = display changes in the number of mistakes.
The current Rsequence is given if r (below) is turned on.
This allows graphs of Rsequence vs mistakes to be made.
g = display genomic uncertainty, Hg. If this deviates much
from 2.0, then the simulation is probably bad.
i = display individuals' mistakes
o = display orbits: information of individual sites is shown
r = display information (Rsequence, bits)
s = current status (range of mistakes) is printed to the output
file.
m = current status (range of mistakes) is printed to the list
file.
These may be in any order. Any other characters (eg, blanks) are
ignored.
Selecting: boolean. If true (ie the first character is 't'), then the
organisms are sorted by their mistakes. If false, then the
organisms are randomly sorted. Normally this should be 'true', but
it does allow one to switch the selection off suddenly and watch
random drift (with no evolution) and the decay of existing patterns
by entropy increase. Selecting is true unless the first character
on the line is an 'f'.
StorageFrequency: The frequency (every so many generations) with which
to store a copy of everything in the all file. If the computer
crashes part way through a long run, then the run can be continued
from the last storage. Of course, a storage is always made at the
end of the evolution.
RandomSites: If the first character is 'r' then the gamma sites are
placed randomly. 'n' means that the sites are placed randomly,
but not overlapping. 'a' means that the sites are placed in a
regular array with constant spacing. In either case sites are
placed outside the recognizer gene, and there are exactly gamma
sites. That is, two randomly placed sites cannot be in the same
place. This avoids conflicts that would make interpretation of the
results difficult.
sigma: real. The standard deviation of the thermal noise. This noise
is added to the weight matrix during evaluation to simulate the
effect of heat changing a binding constant. Set to zero normally.
specialrule: char. SPECIAL RULE parameter. This parameter (introduced
[2001 June 6] determines what to do when two organisms tie for
survival. If the first character on the line is:
r - random survival of one organism
b - both survive (original method)
s - one survives depending on the sorting algorithm. This is an
arbitrary function of how the quicksort routine works and is
therefore generally not a good way to make the decision.
See below for more details.
Note: As of version 3.68 the program can automatically upgrade
the parameter file.
output: messages to the user, including warnings about conditions,
If the display control in evp (see above) includes 'o', then
the generation number and the range of mistakes are given.
If the display control includes 'a', then the mean and standard
deviation of the mistakes are also given.
description
A population of evolving creatures is simulated. Each creature consists
of a genome made of the 4 bases. All creatures have a certain number of
binding sites, and the recognizer for the sites is encoded by a gene in
each genome. The genomes are completely random at first. The recognizer
of each creature is translated from the gene form to a perceptron-like
weight matrix. This matrix is scanned across the genome. The number of
mistakes made is counted. There are two kinds of mistake:
how many times the recognizer misses a real site
and
how many times a non-site is detected by the recognizer.
These are weighted equally. (If they were weighted differently it should
affect the rate but not the final product of the simulation.) All
creatures are ranked by their number of mistakes. The half of the
population with the most mistakes dies; the other half reproduces to take
over the empty positions. Then all creatures are mutated and the process
repeats.
The integer weights of the recognizer are stored as base 4 numbers in twos
complement notation. a=00, c=01, g=10, t=11. If 'bases per integer' were
3, then aaa encodes 0, acg is 6, etc. txx and gxx (where x is any base)
are negative numbers; ttt is -1.
The threshold for recognition of a site is encoded in the genome just
after the individual weights. It is encoded by one integer.
************************************************************************
SPECIAL RULE: if the bugs have the same number of mistakes, reproduction
(by replacement) does not take place. This ensures that the quicksort
algorithm does not affect who takes over the population, and it also
preserves the diversity of the population. [1988 October 26] Without
this, the population quickly is taken over and evolution is extremely
slow!
[2001 June 6] In response to William Dembski's objection (see below for a
link) that this rule is inserting information into the results, a new
parameter is now available to turn off this rule.
What does this rule mean? It means that in a duel it is possible for two
bugs to have a tie. This is not an unreasonable result, and it frequently
happens in the natural world! What does removing the rule mean? It means
that the bugs will duel to the death, even if it is an arbitrary death!
Clearly this cannot affect the overall evolution, but it might slow things
down. Indeed it is interesting because it is often the case that in
fights the combatants are not killed.
************************************************************************
TO RUN THE PROGRAM, create an empty all file (see the description of the
all file for how to do this) and obtain a copy of an evp file (see the
"see also" section below for links to examples). Note that in parameter
files everything to the right of the actual parameter is ignored on each
line. Do not delete this material, it serves as a comment to remind you
what the parameter does! Then you can run the ev program. The results
are in the list and all files. To see the contents of the all file (which
will be a binary on your computer) use the evd program. You can use the
xyplo program to plot items from the list file.
HISTORICAL NOTE: After getting the idea that Rsequence is close to
Rfrequency from data on ribosome binding sites (on 1982 December 7), it
became clear that this had to have evolved. Eventually I recognized that
Rsequence must evolve toward a more or less set Rfrequency. (See the
paper Schneider.ev2000 for a discussion of this.) The challenge was to
test this hypothesis. My first thinking was that a simulation of the
evolution would take thousands of years on a computer. However, I read
the book "Ever Since Darwin, Reflections in Natural History" by Stephen
J. Gould and found in there an interesting chapter "Is the Cambrian
Explosion a Sigmoid Fraud?" (Gould1977.sigmoid) This described Gould and
Eldredge's idea of punctuated equilibrium. I realized that with
selection, if an organism has an advantage it can take over a population
exponentially. By 1984 I had written up my thesis (on the observation
that Rs ~ Rf, Schneider1986) and had a week before my defense in which I
had nothing more to do. So I wrote the ev program and it showed that Rs
does indeed evolve toward Rf. I don't recall that I even mentioned this
during the defense, but it was a nice backup!
documentation
For information calculations, see:
@article{Schneider1986,
author = "T. D. Schneider
and G. D. Stormo
and L. Gold
and A. Ehrenfeucht",
title = "Information content of binding sites on nucleotide sequences",
journal = "J. Mol. Biol.",
volume = "188",
pages = "415-431",
year = "1986"}
The inspirational article is:
@inproceedings{Gould1977.sigmoid,
author = "S. J. Gould",
title = " Is the Cambrian Explosion a Sigmoid Fraud?",
booktitle = "Ever Since Darwin, Reflections in Natural History",
publisher = "W. W. Norton \& Co.",
address = "N. Y.",
name ="Stephen Jay",
pages = "126-133",
year = "1977"}
Applying the Perceptron (a simple neural net) to binding sites:
@article{StormoPerceptron1982,
author = "G. D. Stormo
and T. D. Schneider
and L. Gold
and A. Ehrenfeucht",
title = "Use of the {`Perceptron'} algorithm to distinguish translational
initiation sites in {{\em E. coli}}",
journal = "Nucl. Acids Res.",
volume = "10",
pages = "2997-3011",
year = "1982"}
The evolution paper describing this program is:
@article{Schneider.ev2000,
author = "T. D. Schneider",
title = "Evolution of Biological Information",
journal = "Nucleic Acids Res",
volume = "28",
number = "14",
pages = "2794-2799",
year = "2000"}
examples
A lovely (but long) evolution can be had with the following evp:
*******************************************************************************
3.42 version of ev that this parameter file is designed for.
32 NUMBER OF CREATURES
1024 NUMBER OF BASES PER CREATURE, G
64 NUMBER OF SITES PER CREATURE, gamma
6 WIDTH OF THE RECOGNIZER IN BASES
5 BASES PER INTEGER OF THE RECOGNIZER
1 MUTATION RATE IN HITS PER CREATURE PER GENERATION
0.50 SEED FOR THE RANDOM NUMBER GENERATOR
40000 CYCLES
10 DISPLAY INTERVAL
cgrs5678 a=av, c=change, i=indivls, g=Hg, r=Rs, o=orbit, s=status, m=mistakes
true SELECTING
1000 STORAGE FREQUENCY
a r = random site placement, n = no overlap random, a = array
0 sigma
*******************************************************************************
The list file may be plotted with this parameter file for xyplo, the xyplop:
*******************************************************************************
3 6 zerox zeroy graph coordinate center
x 0 40000 zx min max (character, real, real) if zx='x' then set xaxis
y -1 6 zy min max (character, real, real) if zy='y' then set yaxis
10 28 1 1 xinterval yinterval xsubintervals ysubintervals: axis intervals
8 8 xwidth ywidth width of numbers in characters
0 2 xdecimal ydecimal number of decimal places
15 15 xsize ysize size of axes in cm
generation
Rsequence (bits) | Hg (bits) near 2 | mistakes/gamma are connected circles
a zc 'c' crosshairs, axXyYnN
n 2 zxl base if zxl='l' then make x axis log to the given base
n 2 zyl base if zyl='l' then make y axis log to the given base
*********************************************************************
1 3 xcolumn ycolumn columns of xyin that determine plot location
2 symbol column the xyin column to read symbols from
0 0 xscolumn yscolumn columns of xyin that determine the symbol size
0 0 0 hue saturation brightness columns for color manipulation
*********************************************************************
symbol to plot 'c'=circle, 'b','d'=box, 'x', '+', 'I', 'f', 'g'
r symbol flag character in xyin that indicates that this symbol
0.05 symbol sizex side in inches on the x axis of the symbol.
0.05 symbol sizey as for the x axis, get size from yscolumn
cl 0.05 connection (example for connection is c- 0.05 for dashed 0.05 inch)
n 0.05 linetype size linetype l.-in and size of dashes or dots
*********************************************************************
symbol to plot 'c'=circle, 'b','d'=box, 'x', '+', 'I', 'f', 'g'
g symbol flag character in xyin that indicates that this symbol
0.05 symbol sizex side in inches on the x axis of the symbol.
0.05 symbol sizey as for the x axis, get size from yscolumn
cl 0.05 connection (example for connection is c- 0.05 for dashed 0.05 inch)
n 0.05 linetype size linetype l.-in and size of dashes or dots
*********************************************************************
c symbol to plot 'c'=circle, 'b','d'=box, 'x', '+', 'I', 'f', 'g'
c symbol flag character in xyin that indicates that this symbol
0.05 symbol sizex side in inches on the x axis of the symbol.
0.05 symbol sizey as for the x axis, get size from yscolumn
cl 0.05 connection (example for connection is c- 0.05 for dashed 0.05 inch)
n 0.05 linetype size linetype l.-in and size of dashes or dots
*********************************************************************
.
**** version 8.50 of xyplop: uses cm for distances
. 0 6 0.10
. 0 5 0.10
- 0 4 0.10
. 0 3 0.10
. 0 2 0.10
. 0 1 0.10
l 0 0 1
*******************************************************************************
Note that dotted lines are placed across the graph for each bit, but a
dashed line is put for Rfrequence (= 4 bits). The vertical axis represents
the three kinds of data in the list file.
see also
{OTHER PROGRAMS}
{Program to display the current population:} evd.p
{Program to graph the list file:} xyplo.p
{EXAMPLE FILES}
{Example ev parameter file:} ev.example.evp
{Example xyplop parameter file given above:} ev.example.xyplop
{Parameter file for selective phase in the paper:} evp.selection
{Parameter file for atrophy phase in the paper:} evp.atrophy
{LINKS TO THE PAPER}
{The "Evolution of Biological Information" paper (with active hypertext
links in references):} https://alum.mit.edu/www/toms/paper/ev/
{The "Evolution of Biological Information" paper at Nucleic Acids
Research:} http://nar.oupjournals.org/cgi/content/full/28/14/2794
https://alum.mit.edu/www/toms/icons/donor.pure.gif {TO MAKE LOGOS:
Briefly, use the} evd.p {program to get the sequences (in the sites file)
followed by} makebk.p {(preferably in automatic mode) to create a Delila
book. Then use the delila system to create the logo:}
https://alum.mit.edu/www/toms/delila.html{. More details are given in
the} evd.p {program documentation.}
{William Dembski's objection:}
http://www.metanexus.net/archives/message_fs.asp?list=views&listtype=Magazine&action=sp_simple_archive_&page=1&ARCHIVEID=3294
{Page on William Dembski:}
http://www.fred.net/tds/anti/william.dembski/
{Page testing William Dembski's claim:}
https://alum.mit.edu/www/toms/paper/ev/dembski
author
Thomas Dana Schneider
bugs
none known (well, actually it's full of them... ;-)
technical notes
The program can use the date and time to create random numbers. These are
computer system calls and are not likely to function correctly on other
computers. Simply delete the calls to disable them if you are having
trouble compiling. You can always set different seeds to start the random
numbers by hand.
*)
(* end module describe.ev version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.const *)
namelength = 20; (* length of date-type alpha *)
maxgenomesize = 5000;(* was 1040 maximum genome size *)
(* 1048576 gives segmentation fault *)
maxgamma = 128; (* maximum number of sites. this is used as
the array bound on the locations of the sites *)
maxbugs = 128; (* maximum number of creatures *)
maxwidth = 20; (* maximum width of a recognizer *)
maxbpi = 10; (* one more than the maximum bases per integer.
this value determines how fine the fingers of the recognizer
are able to feel *)
extra = 5; (* the number of extra bases around each site
printed into file sequ. *)
col = 14; (* column width for the data display *)
dec = 5; (* decimal places for the data display *)
displayingtypes = 8; (* number of ways to display things in list *)
mutatehalf = false; (* if true, only newly replicated bugs are mutated,
thereby preserving good strains. If false, all bugs are mutated
every generation. *)
(* end module ev.const version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module datetime.const *)
datetimearraylength = 19; (* length of dataarray for dates,
It is just long enough to include the 4 digit year - solving the
year 2000 problem:
1980/06/09 18:49:11
123456789 123456789
1 2
*)
(* end module datetime.const version = 7.40; {of delmod.p 2000 Feb 18} *)
type
(* begin module date.type *)
(* this type is the same as the one in module book.type *)
alpha = packed array[1..namelength] of char;
(* end module date.type version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module datetime.type *)
(* array for dates *)
datetimearray = packed array[1..datetimearraylength] of char;
(* end module datetime.type version = 7.40; {of delmod.p 2000 Feb 18} *)
(* begin module ev.type *)
base = (a,c,g,t); (* the symbols in the genomes of the bugs *)
misc = record
(* various precalculated data *)
power: array[1..maxbpi, a..t] of integer; (* powers of 4.
power[x,a] = 0
power[x,c] = 4 ^ x.
power[x,g] = 2*power[x,c]
power[x,t] = 3*power[x,c]
this forms a lookup-table for quick translation *)
negative: integer; (* the smallest value in the range
0..4^bpi that represents a negative value in
two s complement notation *)
twonegative: integer; (* twice negative. this is the
actual amount to be subtracted *)
endscang: integer; (* g-1; the end of a scan of the genome *)
ehnb: real; (* the sampling bias factor for the number of
sites, gamma *)
varhnb: real; (* the variance of ehnb *)
rstdev: real; (* the total standard deviation of the sampling bias,
sqrt(varhnb*width) *)
hg: real; (* genomic information *)
eofnforhg: real; (* e(n) for Hg *)
ln2: real; (* the natural log of 2 for calculating things in bits *)
rfrequency: real; (* the information needed to locate sites *)
evversion : real; (* capture the version number of ev *)
end;
(* the genome of a creature is set up as a record to allow
indexing to one bug. this should be faster. *)
chromosome = packed array [1..maxgenomesize] of base;
genometype = record
genome: chromosome;
(* current composition of the genome: *)
composition: array[a..t] of integer;
end;
(* the mistakes each creature has made. first dimension is the
rank number. second dimension: 1. bug identification number
2. mistakes made. this allows sorting using quicksort. *)
position = 0..maxbugs; (* the position type for quicksort *)
rankthings = (bugid, mistakes);
ranktype = array[1..maxbugs,bugid..mistakes] of integer;
population = record
bugs: integer; (* number of creatures *)
genome: integer; (* number of potential binding sites *)
gamma: integer; (* number of sites *)
randomsites: char; (* r: random site placement with overlap
n: random site placement, no overlap
a: array placement *)
width: integer; (* width of sites in bases *)
bpi: integer; (* bases per integer in recognizer gene wmatrix *)
bpthreshold: integer; (* bases per threshold integer *)
genomesize: integer; (* the total length of the genome,
calculated as g+width-1+extra *)
site: array[1..maxgamma] of integer; (* locations
of the sites in the genomes. The first base of a site that
starts at base x is at base x+1. See function recognize. *)
creature: array[1..maxbugs] of genometype; (* all the bugs *)
rank: ranktype; (* the rankings of the bugs by their mistakes *)
(* this array records the locations of the sites to make
testing faster *)
sitelocations: packed array [1..maxgenomesize] of boolean;
halfofbugs: integer; (* half of the bugs, (bugs div 2) *)
firstmutated: integer; (* the first bug mutated. This is either
1 or halfofbugs+1 *)
end;
everything = record
precalc: misc; (* lots of precalculated stuff *)
datetimestamp: datetimearray; (* the date and time used
to identify the moment of creation on all output *)
mu: integer; (* mutation rate in hits per creature
per generation *)
seed: real; (* random number generator seed *)
initialseed: real; (* the seed read from the evp *)
p: population;
generation: integer; (* cycle number *)
cycles: integer; (* cycles to run *)
displayinterval: integer; (* every display-th generation is shown *)
(* NB: if more types are added to this list, increase the constant
displayingtypes in module ev.const *)
displayaverage : boolean; (* display average mistakes *)
displaychange : boolean; (* display change of mistakes *)
displaygenomich : boolean; (* display genomic uncertainty *)
displayindividual : boolean; (* display individual mistakes *)
displayorbits : boolean; (* display Rindividual orbits *)
displayrsequence : boolean; (* display Rsequence *)
displaystatus : boolean; (* display to output *)
displayonemistake : boolean; (* display the best individuals mistakes *)
previousmistakes: integer; (* number of mistakes made in previous
display (not in previous generation, it's only updated when
the display is activated according to displayinterval *)
selecting: boolean; (* If true, then the organisms are sorted
by their mistakes. If false, then the organisms are randomly
sorted. Normally this should be 'true'. *)
storagefrequency: integer; (* frequency (generations) with which to
store the everything record. *)
sigma: real; (* standard deviation of noise *)
noise: boolean; (* true if sigma > 0; used for speeding the code *)
haveone: boolean; (* have a gaussian value; used by the gaussian
procedure to switch between gaussianX and
gaussianY *)
gaussianX, gaussianY: real; (* gaussian values for the gaussian
procedure *)
specialrule: char; (* SPECIAL RULE for ties:
r - random survival of one organism
b - both survive (original method)
s - one survives depending on the sorting algorithm. This is an
arbitrary function of how the quicksort routine works and is
therefore generally not a good way to make the decision.
*)
end;
(* the allfile allows the program to be stopped and
restarted again: *)
allfile = file of everything;
wmatrix = record
matrix: array[a..t, 1..maxwidth] of integer; (* the w matrix *)
{
zzz consider switching to zero based method:
matrix: array[a..t, 0..maxwidth] of integer; (* the w matrix *)
}
threshold: integer; (* the lower threshold for function *)
end;
(* an array type for reals, such as f(B,L) and Ri(B,L) *)
blarray = array[a..t, 1..maxwidth] of real;
(* end module ev.type version = 2.50; (@ of ev 1988 oct 6 *)
var
(* begin module ev.var *)
list, evp: text; (* results, and parameter files *)
all: allfile;
e: everything; (* all the data. this must be global
to allow the quicksort to operate on the rank array.
other than that use, the data are passed. *)
(* end module ev.var version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module halt *)
procedure halt;
(* stop the program. the procedure performs a goto to the end of the
program. you must have a label:
label 1;
declared, and also the end of the program must have this label:
1: end.
examples are in the module libraries.
this is the only goto in the delila system. *)
begin
writeln(output,' program halt.');
goto 1
end;
(* end module halt version = 7.40; {of delmod.p 2000 Feb 18} *)
(* begin module copyaline *)
procedure copyaline(var fin, fout: text);
(* copy a line from file fin to file fout *)
begin (* copyaline *)
while not eoln(fin) do begin
fout^ := fin^;
put(fout);
get(fin)
end;
readln(fin);
writeln(fout);
end; (* copyaline *)
(* end module copyaline version = 7.56; {of delmod.p 2000 Nov 21} *)
(* begin module random *)
procedure random(var seed: real);
(* random generator 2. version = 1.00; 1986 December 31
Test this routine with the program tstrnd.
written by David Masternarde *)
(* This random number generator is based on a shift register with a single bit
of feedback, as described in Electronics for Neurobiologists, by Brown, Maxfield
and Moraff, MIT press 1973, referencing Random Process Simulation and
Measurement by Korn, McGraw-Hill 1966. The random seed rand, a number between 0
and 1 exclusive, is converted to an integer between 1 and 2**23-1, inclusive.
This 23-bit number is shifted right one bit and the output of the last (23rd)
bit and the 9th bit are added modulo 2 (exclusive orred) and fed back into the
new first bit. This is done between 4 and 11 times, depending on the last 3
bits of the original number. The result is converted back to a real number
between 0 and 1 from which the 23 bit integer can be recovered on the next call.
The 23-bit shift register goes through all 2**23-1 values before repeating;
the repetition frequency of this algorithm could be less or greater depending
on the seed, because of the random number of multiple shifts per call. *)
(* powers of 2 *)
const pow14=16384; pow15=32768; pow22= 4194304; pow23=8388608;
var iseed, (* integer shift register *)
i, nrep: integer; (* index, number of times to do shift *)
begin (* random *)
iseed := round(seed*pow23); (* convert to 23 bit number *)
if (iseed<1) or (iseed>=pow23) then iseed := 1;
nrep := 4 + iseed mod 8; (* do it 4 to 11 times based on last 3 bits *)
for i:= 1 to nrep do
(* if last bit and 9th bit are equal, feed back a 0, otherwise a 1 *)
if( odd(iseed) = ((iseed mod pow15) >= pow14))
then iseed := iseed div 2
else iseed := (iseed div 2) + pow22;
seed := iseed/pow23;
end; (* random *)
(* end module random version = 1.02; (@ of gauss.p 1994 sep 5 *)
(* begin module getdatetime *)
procedure getdatetime(var adatetime: datetimearray);
(* get the date and time into a single array from the system clock.
adatetime contains the date:
1980/06/09 18:49:11
ye mo da ho mi se
(year, month, day, hour, minute, second).
As of 2000 February 18, the Sun Pascal compiler requires a formatting
statement. This statement allows the date to be generated in this
standard Delila format in a single call. Information about the
formatting statement is available on the manual page for date in Unix.
If a computer does not have this method, see the 'oldgetdatetime' routine
in delmod.p (https://alum.mit.edu/www/toms/delila/delmod.html)
for some conversion code.
*)
begin
date(adatetime,'%Y/%m/%d %H:%M:%S');
end;
(* end module getdatetime version = 7.40; {of delmod.p 2000 Feb 18} *)
(* begin module writedatetime *)
procedure writedatetime(var thefile: text; adatetime: datetimearray);
(* expand the date and time out and print in the file *)
var
index: integer; (* index of datetime *)
begin
for index:=1 to datetimearraylength do write(thefile,adatetime[index])
end;
(* end module writedatetime version = 7.40; {of delmod.p 2000 Feb 18} *)
(* begin module timeseed *)
procedure addtoseed(var seed, power: real; c: char);
(* add the digit represented by c to the seed at the power position *)
var
n: integer; (* the character represented by c *)
begin (* addtoseed *)
power := power/10;
{
writeln(output,'addtoseed, c = ',c);
writeln(output,'addtoseed, ord(c) = ',ord(c));
}
case c of
' ': begin
writeln(output,'timeseed: error in datetime');
halt;
end;
'0': n := 0;
'1': n := 1;
'2': n := 2;
'3': n := 3;
'4': n := 4;
'5': n := 5;
'6': n := 6;
'7': n := 7;
'8': n := 8;
'9': n := 9
end;
(*writeln(output,'timeseed number is [',n:1,']'); (@ debug *)
seed := seed + power*n
end; (* addtoseed *)
procedure makeseed(adatetime: datetimearray; var seed: real);
(* convert adatetime to a real number in seed, reversed order *)
var
power: real; (* a digit of the seed such as 0.01 *)
begin
seed := 0.0;
power := 1.0;
addtoseed(seed, power, adatetime[19]);
addtoseed(seed, power, adatetime[18]);
(* : *)
addtoseed(seed, power, adatetime[16]);
addtoseed(seed, power, adatetime[15]);
(* : *)
addtoseed(seed, power, adatetime[13]);
addtoseed(seed, power, adatetime[12]);
(* *)
addtoseed(seed, power, adatetime[10]);
addtoseed(seed, power, adatetime[9]);
(* / *)
addtoseed(seed, power, adatetime[7]);
addtoseed(seed, power, adatetime[6]);
(* / *)
addtoseed(seed, power, adatetime[4]);
addtoseed(seed, power, adatetime[3]);
addtoseed(seed, power, adatetime[2]);
addtoseed(seed, power, adatetime[1]);
end;
procedure orderseed(adatetime: datetimearray; var seed: real);
(* convert adatetime to a real number in seed, normal order *)
var
power: real; (* a digit of the seed such as 0.01 *)
begin
seed := 0.0;
power := 1.0;
addtoseed(seed, power, adatetime[1]);
addtoseed(seed, power, adatetime[2]);
addtoseed(seed, power, adatetime[3]);
addtoseed(seed, power, adatetime[4]);
(* / *)
addtoseed(seed, power, adatetime[6]);
addtoseed(seed, power, adatetime[7]);
(* / *)
addtoseed(seed, power, adatetime[9]);
addtoseed(seed, power, adatetime[10]);
(* *)
addtoseed(seed, power, adatetime[12]);
addtoseed(seed, power, adatetime[13]);
(* : *)
addtoseed(seed, power, adatetime[15]);
addtoseed(seed, power, adatetime[16]);
(* : *)
addtoseed(seed, power, adatetime[18]);
addtoseed(seed, power, adatetime[19]);
end;
procedure timeseed(var seed: real);
(* read the computer date and time. reverse the order of the digits
and put a decimal point in front. this gives a fraction between
zero and one that varies quite quickly, and is always unique (if the
computer has sufficient accuracy). it is to be used as a seed to
a random number generator. *)
var
adatetime: datetimearray; (* a date and time *)
begin (* timeseed *)
getdatetime(adatetime);
{
writeln(output,'timeseed: adatetime: ',adatetime);
}
makeseed(adatetime, seed);
end; (* timeseed *)
(* end module timeseed version = 7.40; {of delmod.p 2000 Feb 18} *)
(* begin module basetochar *)
function basetochar(ba:base):char;
(* convert a base into a character *)
begin
case ba of
a: basetochar:='a';
c: basetochar:='c';
g: basetochar:='g';
t: basetochar:='t';
end
end;
(* end module basetochar version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module pic.polrec *)
procedure polrec(r,theta: real; var x,y: real);
(* convert polar to rectangular coordinates,
theta is in radians *)
begin
x := r*cos(theta);
y := r*sin(theta)
end;
(* end module pic.polrec version = 1.02; (@ of gauss.p 1994 sep 5 *)
(* begin module gaussian *)
procedure gaussian(var seed: real;
var haveone: boolean;
var x,y: real);
(* Produce a Gaussianly distributed number. The seed is for the random number
generator. Haveone is a Boolean which should be set to false the first time
the routine is called. The routine produces two Gaussian numbers, and returns
one in x, and the other in y. The next time the routine is called, the y value
is returned in x (the y value equals the x value). This allows one to keep
calling the routine and using the x value.
*)
const
twopi = 6.283185307179586476925; (* twice pi *)
var
radius: real; (* radius *)
angle: real; (* angle *)
begin
if haveone
then x := y
else begin
random(seed);
angle := seed * twopi;
random(seed);
radius := sqrt(-2.0*ln(seed));
polrec(radius,angle,x,y);
end;
haveone := not haveone
end;
(* end module gaussian version = 1.02; (@ of gauss.p 1994 sep 5 *)
(* ************************************************************************ *)
(* begin module calehnb *)
procedure calehnb(n: integer;
gna,gnc,gng,gnt: integer;
var hg, ehnb, varhnb: real
(* ; debugging: boolean *));
(* calculate e(hnb) in bits/bp (ehnb) for a number (n) of example sequence
sites. gna to gnt are the composition to use for the genome probabilities
of a to t. the genomic entropy hg and the variance var(hnb)
(=varhnb) are also calculated.
if the variable debugging is passed to the procedure then the individual
combinations of hnb are displayed.
note: this procedure should not be broken into smaller
procedures so that it remains efficient.
version = 3.02; of procedure calehnb 1983 nov 23 *)
const
maxsize = 200; (* the largest n allowed *)
accuracy = 10000; (* less than (1/accuracy) bits error is demanded
for the sum of pnb (see variable 'total') at the end of the procedure *)
var
log2: real; (* natural log of 2, used to find log base 2 *)
logn: real; (* log of n *)
nlog2: real; (* n * log2 *)
gn: integer; (* sum of gna..gnt *)
logpa,logpc,logpg,logpt: real; (* logs of genome probabilities *)
(* log of n factorial is the sum of i=1 to n of log(i).
the array below represents these logs up to n *)
logfact: array[0..maxsize] of real;
(* precalculated values of -p*log2(p), where p=nb/n for
nb = 0 .. n. m stands for minus *)
mplog2p: array[0..maxsize] of real;
i: integer; (* index for logfact and mplog2p *)
logi: real; (* natural log of i *)
na, nc, ng, nt: integer; (* numbers of bases in a site *)
done: boolean; (* true when the loop is completed *)
pnb: real; (* multinomial probability of a combination
of na, nc, ng, nt *)
hnb: real; (* entropy for a combination of na..nt *)
pnbhnb: real; (* pnb*hnb, an intermediate result *)
sshnb: real; (* sum of squares of hnb *)
(* variables for testing program correctness: *)
total: real; (* sum of pnb over all combinations of na..nt
if this is not 1.00, the program is in error *)
counter: integer; (* counts the number of times through
the loop *)
begin (* calehnb *)
(* prevent access to outside the arrays: *)
if n > maxsize then begin
writeln(output,' procedure calehnb: n > maxsize (',
n:1,'>',maxsize:1,')');
halt
end;
counter := 0;
total := 0.0;
log2 := ln(2);
logn := ln(n);
nlog2 := n * log2;
(* get logs of genome probabilities *)
gn := gna + gnc + gng + gnt;
logpa := ln(gna/gn);
logpc := ln(gnc/gn);
logpg := ln(gng/gn);
logpt := ln(gnt/gn);
(* find genomic entropy *)
hg := -(gna*logpa + gnc*logpc + gng*logpg + gnt*logpt)/(gn*log2);
ehnb := 0.0; (* start error entropy at zero *)
sshnb := 0.0;
(* make table of log of n factorial up to n
and entropies for nb/n *)
logfact[0] := 0.0; (* factorial(0) = 0 *)
mplog2p[0] := 0.0;
for i := 1 to n do begin
logi := ln(i);
logfact[i] := logfact[i - 1] + logi;
mplog2p[i] := - i * (logi - logn) / nlog2
end;
(* begin by looking at the combination with all a: na = n *)
na := n;
nc := 0;
ng := 0;
nt := 0;
(* the following loop simulates a number of nested loops
of the form:
for b1=a to t do
for b2=b1 to t do
for b3=b2 to t do
...
for bn=b(n-1) to t do ...
the resulting set of variables increase in alphabetic order
since no inner loop variable can have a value less than any
outer loop. the number of times through the inner-most loop
is given by:
o = (n + 1)*(n + 2)*(n + 3)/6
in the case where there are four symbols (a,c,g,t) and n is
the number of nested loops.
a recursive set of loops would be possible, but it
would use up too much memory in practical cases (up to n=150
or higher). a second algorithm sequests the loop variables
into an array and increments them there. however, the goal
is to get all possible combinations for na, nc, ng, nt, where
the sum of these is n. the nested loops provide all the
combinations in alphabetic order, assuring that there can not
be any duplicates. to find nb (one of na..nt) one would look
at which of the variables b1 to bn were of value b. this is
a wasteful operation.
the loop below simulates the array of control variables
by changing each nb directly.
*)
done := false;
repeat
(* pnb is calculated by taking the log of the expression
fact(n) na nc ng nt
pnb = ------------------- pa * pc * pg * pt .
fact(na).. fact(nt)
log(pnb) generates a series of sums, allowing
the calculation to proceed by addition and
multiplication rather than multiplication and
exponentiation. the factorials become tractable
in this way *)
pnb := exp(logfact[n] (* n factorial *)
- logfact[na]
- logfact[nc]
- logfact[ng]
- logfact[nt]
+ na * logpa
+ nc * logpc
+ ng * logpg
+ nt * logpt);
hnb := mplog2p[na]
+ mplog2p[nc]
+ mplog2p[ng]
+ mplog2p[nt];
pnbhnb := pnb * hnb;
ehnb := ehnb + pnbhnb;
sshnb := sshnb + pnbhnb * hnb; (* sum of squares of hnb *)
(* the following section keeps track of the calculation
and writes out the current set of nb. *)
counter := succ(counter);
(* if debugging then begin
write(output,' ',counter:2,' ');
for i := 1 to na do write(output,'a');
for i := 1 to nc do write(output,'c');
for i := 1 to ng do write(output,'g');
for i := 1 to nt do write(output,'t');
write(output,' ',na:3,nc:3,ng:3,nt:3);
writeln(output,' pnb = ',pnb:10:5);
end; *)
total := total + pnb;
(* the remaining portion of this repeat loop generates
the values of na, nc, ng and nt. notice that
there are 7 possibilities at each loop increment.
other than the stop, in each case the sum of
na+nc+ng+nt remains constant (=n). *)
if nt > 0
then begin (* ending on a t - do outer loops *)
if ng > 0
then begin (* turn g into t *)
ng := ng - 1;
nt := nt + 1
end
else if nc > 0
then begin (* turn one c into g,
and all t to g (note ng = 0 initially) *)
nc := nc - 1;
ng := nt + 1;
nt := 0
end
else if na > 0
then begin (* turn one a into c and
all g and t to c. (note ng=nc=0 initially) *)
na := na - 1;
nc := nt + 1;
nt := 0
end
else done := true (* since nt = n *)
end
else begin (* no t - increment innermost loop *)
if ng > 0
then begin (* turn g into t *)
ng := ng - 1;
nt := nt + 1
end
else if nc > 0
then begin (* turn c into g *)
nc := nc - 1;
ng := ng + 1
end
else begin (* na > 0; turn a into c *)
na := na - 1;
nc := nc + 1
end
end
until done;
(* final adjustment: we only have the sum of squares so far *)
varhnb := sshnb - ehnb*ehnb;
(* if this message appears, there is either a bug in the code or
the computer cannot be as accurate as requested *)
if accuracy <> round(accuracy * total) then begin
writeln(output,' procedure calehnb: the sum of probabilities is');
writeln(output,' not accurate to one part in ',accuracy:1);
writeln(output,' the sum of the probabilities is ',total:10:8);
end;
(* if this message appear, then there is an error in the
repeat-until loop: it did not repeat as many times as
is expected from the algorithm *)
if counter <> round((n+1)*(n+2)*(n+3)/6) then begin
writeln(output,' procedure calehnb: program error, the number of');
writeln(output,' calculations is in error');
halt
end;
(* writeln(output, ' total: ',total:10:5);
writeln(output,' count = ',counter:1);
writeln(output,' (n+1)*(n+2)*(n+3)/6 = ',
round((n+1)*(n+2)*(n+3)/6):1); *)
end; (* calehnb *)
(* end module calehnb version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module calaehnb *)
procedure calaehnb(n: integer;
gna,gnc,gng,gnt: integer;
var hg, aehnb, avarhnb: real);
(* calculate ae(hnb) in bits/bp (=aehnb) for a number (n) of example sequence
sites. gna to gnt are the composition to use for the genome probabilities
of a to t. the genomic entropy (=hg) and the variance avar(hnb) (=avarhnb) are
also calculated.
this procedure gives approximations for e(hnb) and var(hnb).
it is based on a formula derived by jeff haemer.
see also:
g. p. basharin
theory probability appl. 4(3): 333-336 (1959)
'on a statistical estimate for the entropy of a sequence
of independent random variables'
version = 3.01; of procedure calaehnb 1983 nov 23 *)
var
log2: real; (* natural log of 2 *)
gn: integer; (* sum of gna..gnt *)
pa, pc, pg, pt: real; (* genomic probabilities *)
e: real; (* the approximate sampling error *)
begin (* calaehnb *)
log2 := ln(2);
gn := gna + gnc + gng + gnt;
pa := gna/gn;
pc := gnc/gn;
pg := gng/gn;
pt := gnt/gn;
hg := -( (pa*ln(pa))
+(pc*ln(pc))
+(pg*ln(pg))
+(pt*ln(pt)))/log2;
e := 3/(2*log2*n);
aehnb := hg - e;
avarhnb := e * e
end; (* calaehnb *)
(* end module calaehnb version = 2.50; (@ of ev 1988 oct 6 *)
(* ************************************************************************ *)
(* begin module ev.calcerror *)
procedure calcerror(num, gna,gnc,gng,gnt, width: integer;
var ehnb, varhnb, rstdev, hg: real);
(* for num sequences, with a genomic composition (gna,gnc,gng,gnt),
and a width of sites of 'width',
calculate the sampling bias, ehnb (bits) and its variance varhnb.
Then calculate the total standard deviation for the site (rstdev),
based on the width of the sites.
hg is the genomic information *)
const
kickover = 50; (* e(hnb) for n values above this are estimated from
ae(hnb) = hg - 3/(2 ln(2) n) while those below
are calculated exactly *)
begin
if num <= kickover
then begin (* not using approximation *)
calehnb (num, gna,gnc,gng,gnt, hg, ehnb,varhnb);
end
else begin (* using approximation *)
calaehnb(num, gna,gnc,gng,gnt, hg,ehnb,varhnb);
end;
rstdev := sqrt(varhnb*width);
end;
(* end module ev.calcerror version = 2.50; (@ of ev 1988 oct 6 *)
procedure computeeofnforhg(var e: everything);
(* compute e(n) for Hg from the given genome size. NOTE that
the genome size variable (ie the input) is genome, but the actual
number of bases covered by the scan is another width of the
recognizer, so the n = genome + width *)
var
ehnb, varhnb, rstdev, dummyhg: real; (* things needed by calcerror *)
begin
(* compute small sampling correction for hg *)
calcerror(e.p.genome+e.p.width, 100,100,100,100, 1,
ehnb, varhnb, rstdev, dummyhg);
e.precalc.eofnforhg := 2-ehnb;
{
writeln(output,'genomesize= ',e.p.genomesize:5);
writeln(output,'genome= ',e.p.genome:5);
writeln(output,'width= ',e.p.width:5);
writeln(output,'new eofnforhg = ',e.precalc.eofnforhg:8:5);
}
end;
(* begin module ev.makeprecalc *)
procedure makeprecalc(var e: everything);
(* precalculate some frequently used values *)
var
place: integer; (* index for calculating precalc.power *)
begin
with e.p do begin
(* calculate the number of bases needed to store the threshold integer
so that the threshold integer never can 'bottom out'. That is,
the range of the threshold integer will exceed the range that
the matrix can supply. The 100's make the truncating go UP instead
of down. If the width is an exact power of 4, this won't put in
an extra base, as bpi+trunc(1 + (ln(width)/ln(4))) would do.
This would give us:
bpthreshold := bpi + 100- trunc(100- (ln(width)/ln(4)) );
Unfortunately, experience [1988Oct17] shows that with such
a large range, the model can fail to evolve. This is because
the threshold is compared to the SUM of the weights, which
therefore form a Gaussian distribution. This distribution can
be tight, so that it becomes unlikely that the any of the sites
can cross the threshold to allow selection to take place.
therefore the range of bpthreshold should be kept small enough
to 'oil' the evolution, but not too large:
WARNING: if any other value of bpthreshold is used than bpi,
then the variables 'negative' and 'twonegative' must be split,
so they represent either bpi or bpthreshold,
and the appropriate pair must used in calling function decode. *)
bpthreshold := bpi
end;
(*
writeln(output,'makeprecalc: bpthreshold = ',e.p.bpthreshold:1);
*)
with e.precalc do begin
(* determine the powers of 4 so that translation of the
w matrix is faster. Note that this must be done to bpthreshold,
rather than only bpi. *)
power[1,a] := 0;
power[1,c] := 1;
power[1,g] := 2;
power[1,t] := 3;
for place := 2 to e.p.bpthreshold do begin
power[place,a] := 0; (* 0 times *)
power[place,c] := 4*power[place-1,c]; (* 1 times *)
power[place,g] := 2*power[place,c]; (* 2 times *)
power[place,t] := 3*power[place,c]; (* 3 times *)
end;
negative := round( exp(e.p.bpthreshold*ln(4)) / 2);
twonegative := 2*negative;
(* calculate the end of the scan of the genome *)
with e.p do endscang := genome - 1;
(* calculate the sampling error information, based on the number of
sites of each creature, use equiprobable genome for this *)
calcerror(e.p.gamma, 100,100,100,100, e.p.width,
ehnb, varhnb, rstdev, hg);
(* calculate the sampling error information for the size of the genome *)
computeeofnforhg(e);
(* precalculate the natural log of 2, so conversion to bits is easy *)
ln2 := ln(2.0);
(* precalculate Rfrequence for making warnings *)
rfrequency := -ln(e.p.gamma/e.p.genome)/ln2;
evversion := version; (* capture the version number of ev *)
end;
(* a test on how well the evolution will run *)
with e do with p, precalc
do if rfrequency > 2*width then begin
writeln(output,'WARNING: requested site density exceeds 1.0');
writeln(output,' width = ', width:5,' bits per site');
writeln(output,' rfrequency = ',rfrequency:10:5,' bits per site');
writeln(output,' maximum site information = 2 x width = ',
(2*width):5,' bits/site');
writeln(output,' maximum site density = rfrequency/(2*width) = ',
(rfrequency/(2*width)):5:3,' bits/site');
writeln(output);
end;
with e.p, e.precalc do begin
(* check that the programmer does not try to circumvent the warnings. *)
if bpi <> bpthreshold then begin
writeln(output,
'procedure makeprecalc: programer error. see warnings.');
halt
end
end
end;
(* end module ev.makeprecalc version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.puteverything *)
procedure puteverything(e: everything; var all: allfile);
(* put everything in the all file *)
begin
rewrite(all);
all^ := e;
put(all);
end;
(* end module ev.puteverything *)
(* begin module ev.geteverything *)
procedure geteverything(var e: everything; var all: allfile);
(* get everything in the all file. Note that this does not do a reset *)
begin
e := all^;
get(all);
end;
(* end module ev.geteverything *)
(* begin module ev.getparams *)
procedure getparams(var all: allfile;
var evp: text;
var e: everything);
(* get the parameters from all and evp, store them in the everything record.
precalculate the powers of 4. *)
var
c: char; (* a character read from evp *)
dohalt: boolean; (* do a halt after giving error messages to the user *)
i: integer; (* dummy integer for reading evp when
file all is not empty *)
parameterversion: real; (* parameter version number *)
r: real; (* like i but a real *)
procedure message(msg: integer);
(* produce a message about the relationship between all and evp *)
begin (* message *)
writeln(output,' ---------------------------------------------');
writeln(output,' YOU ALTERED SOMETHING IN evp',
' THAT YOU SHOULD NOT HAVE: ');
write(output,' ');
case msg of
1: write(output,'number of bugs');
2: write(output,'G (genome size)');
3: write(output,'gamma increased');
4: write(output,'width');
5: write(output,'bases/integer');
end;
writeln(output);
writeln(output);
writeln(output,' Only the following evp parameters are allowed');
writeln(output,' to be different from those in the all file:');
writeln(output);
writeln(output,' mutation rate');
writeln(output,' number of cycles');
writeln(output,' seed');
writeln(output,' number of sites (allowed to decrease)');
writeln(output,' display interval');
writeln(output,' display controls');
writeln(output,' selection ');
writeln(output,' storage frequency');
writeln(output,' specialrule');
writeln(output);
writeln(output,' Use program evd to determine the current values.');
halt
end; (* message *)
procedure getdisplays;
(* get the display switch values *)
var
c: char; (* a character read from evp *)
l: integer; (* loop variable for reading display controls *)
begin (* getdisplays *)
e.displayaverage := false;
e.displaychange := false;
e.displaygenomich := false;
e.displayindividual := false;
e.displayorbits := false;
e.displayrsequence := false;
e.displaystatus := false;
e.previousmistakes := -1; (* impossible, so will force first display *)
if not eof(evp) then begin
for l := 1 to displayingtypes do begin
read(evp,c);
if c in ['a','c','g','i','o','r','s','m'] then case c of
'a': e.displayaverage := true;
'c': e.displaychange := true;
'g': e.displaygenomich := true;
'i': e.displayindividual := true;
'o': e.displayorbits := true;
'r': e.displayrsequence := true;
's': e.displaystatus := true;
'm': e.displayonemistake := true;
end
end;
readln(evp)
end
else begin
e.displayaverage := true;
e.displaychange := true;
e.displaygenomich := true;
e.displayindividual := true;
e.displayorbits := true;
e.displayrsequence := true;
e.displaystatus := true;
e.displayonemistake := true;
end
end; (* getdisplays *)
procedure getselect;
(* get whether or not to select by mistakes *)
var
c: char; (* a character read from evp *)
begin
e.selecting := true; (* normal circumstance *)
if not eof(evp) then begin
readln(evp,c);
if c = 'f' then begin (* do it for no other character than this one *)
e.selecting := false;
writeln(output,'WARNING: NO SELECTION')
end
end
end; (* getselect *)
procedure upgradefrom342tonow;
(* [2001 Jun 6]: introduce specialrule *)
var
hold: text; (* text file for upgrading evp *)
begin
writeln(output, 'FIXING THE PARAMETER file, evp!');
rewrite(hold);
reset(evp);
readln(evp); (* skip header line *)
while not eof(evp) do copyaline(evp, hold); (* grab it *)
rewrite(evp); (* here goes!! *)
writeln(evp,version:4:2,
' version of ev that this parameter file is designed for.');
reset(hold);
while not eof(hold) do copyaline(hold, evp); (* put it back *)
(* now add the new line: *)
writeln(evp,
'b SPECIAL RULE for ties: r: random, b: both, s: sort');
(* now prepare for proper reading *)
reset(evp);
readln(evp, parameterversion);
if parameterversion <> version then begin
writeln(output,'utter failure of upgrade attempt!');
halt
end
end;
begin (* getparams *)
reset(all);
reset(evp);
readln(evp, parameterversion);
if (parameterversion < updateversion) or
(parameterversion > highversion)
then begin
writeln(output, 'You have an old parameter file!');
{zzzvvv}
if parameterversion = 3.42
then upgradefrom342tonow
else halt
end;
{ppp}
if not eof(all) then begin
(* we use the all file to determine the
previous state of the population and the parameters *)
writeln(output,'Old Creation ------------------------');
writeln(output,' Continuation: using the all file for previous data');
geteverything(e,all);
(* now we make sure that the evp file agrees *)
if not eof(evp) then with e.p do begin
readln(evp,i);
if i <> bugs then message(1);
readln(evp,i);
if i <> genome then message(2);
readln(evp,i);
if i > gamma then message(3)
else if i < gamma then begin
writeln(list,'* the last ',(gamma-i):1,
' sites have been removed');
writeln(output,' the last ',(gamma-i):1,
' sites have been removed');
gamma := i;
(* make sure that we recalculate the sampling error! *)
makeprecalc(e)
end;
readln(evp,i);
if i <> width then message(4);
readln(evp,i);
if i <> bpi then message(5);
readln(evp,i);
with e do if i <> mu then begin
mu := i;
writeln(output,' new mutation rate: ', mu:1);
writeln(list,'* new mutation rate: ', mu:1)
end;
readln(evp,r);
with e do if r <> initialseed then begin
writeln(output,' NOTE: seed has been changed');
writeln(list,'* NOTE: seed has been changed');
initialseed := r;
seed := r;
end
else begin
writeln(output,' current seed will be used');
writeln(list,'* current seed will be used');
end;
(* allow changes to cycles and display interval and selection *)
readln(evp,e.cycles);
readln(evp,e.displayinterval);
getdisplays;
getselect;
readln(evp,e.storagefrequency);
(* allow for random site placement *)
readln(evp, c);
if randomsites <> c then begin
writeln(output,'You cannot change the site placement.');
halt;
end;
randomsites := c;
readln(evp,e.sigma);
end
else begin
writeln(output,' evp file is empty,',
' number of cycles will be zero');
e.cycles := 0
end
end
else if not eof(evp)
then with e.p do begin (* just read in the params for creation *)
writeln(output,'New Creation ------------------------');
e.generation := 0; (* this defines the start *)
readln(evp,bugs);
readln(evp,genome);
readln(evp,gamma);
if gamma = 0 then begin
writeln(output,'There must be some sites, gamma must not be zero');
halt;
end;
readln(evp,width);
readln(evp,bpi);
readln(evp,e.mu);
readln(evp,e.seed);
readln(evp,e.cycles);
readln(evp,e.displayinterval);
getdisplays;
getselect;
readln(evp,e.storagefrequency);
readln(evp, randomsites);
if not (randomsites in ['r','n','a']) then begin
writeln(output,'randomsites must be one of: r, n, a');
halt
end;
with e do begin
readln(evp,e.sigma);
if sigma = 0.0
then noise := false
else noise := true;
haveone := false;
gaussianX := -1.0; (* unlikely value flag *)
gaussianY := -1.0; (* unlikely value flag *)
end;
genomesize := genome + (width - 1) + extra;
if (e.seed <= 0.0) or (e.seed >= 1.0)
then timeseed(e.seed);
e.initialseed := e.seed; (* record it permanently *)
readln(evp, e.specialrule);
end
else begin
writeln(output,' both all and evp files are empty');
halt
end;
(* tests. They are put here so that they apply to both an
initial (creation) reading of the parameters, and to a later reading.
This insures that they are valid. *)
with e.p do begin
dohalt := false;
if bugs > maxbugs then begin
writeln(output,' The number of creatures may not',
' exceed ',maxbugs:1,'.');
dohalt := true
end;
if odd(bugs) then begin
writeln(output,' The number of creatures must',
' be even.');
dohalt := true
end;
halfofbugs := bugs div 2;
if genome <= 0 then begin
writeln(output,' There must be a positive genome size.');
dohalt := true
end;
(* another test of g is the test of genomesize, below *)
if gamma > genome then begin
writeln(output,' The number of sites may not',
' exceed genome size: ',genome:1,'.');
dohalt := true
end;
if gamma <= 0 then begin
writeln(output,' There must be a positive number of sites.');
dohalt := true
end;
if width > maxwidth then begin
writeln(output,' The width of sites may not ',
' exceed ',maxwidth:1,'.');
dohalt := true
end;
if genomesize > maxgenomesize then begin
writeln(output,' The genome size is f(g) = g + (width-1) + extra.',
' This can not exceed ',maxgenomesize:1);
writeln(output,' genome=',genome:1,
', width=',width:1,
', extra=',extra:1,
', f(genome)=', (genome + (width-1) + extra):1);
dohalt := true
end;
if bpi < 0 then begin
writeln(output,' Bases per integer must be positive.');
dohalt := true
end;
if bpi > maxbpi then begin
writeln(output,' Bases per integer must be',
' less than or equal to ',maxbpi:1,'.');
dohalt := true
end;
if e.mu < 0 then begin
writeln(output,' Mutation rate must be zero or positive.');
dohalt := true
end;
if e.cycles < 0 then begin
writeln(output,' Number of cycles must be zero or positive.');
dohalt := true
end;
if e.displayinterval < 1 then begin
writeln(output,' Display interval must be 1 or larger.');
dohalt := true
end;
if e.storagefrequency < 1 then begin
writeln(output,' Storage frequency must be 1 or larger.');
dohalt := true
end;
if e.sigma < 0 then begin
writeln(output,' sigma must be non-negative.');
dohalt := true
end;
(* SPECIAL RULE *)
if not (e.specialrule in ['r','b','s']) then begin
writeln(output,' specialrule must be in ''rbs''.');
dohalt := true
end;
end;
if dohalt then halt
end; (* getparams *)
(* end module ev.getparams version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.listparams *)
procedure listparams(var list: text; e: everything);
(* create the header of the file and list the parameters *)
procedure truthof(x: boolean);
(* print the truth value of x to file list*)
begin
if x then write(list,'* true ')
else write(list,'* false')
end;
begin
with e.p do begin
writeln(list,'*');
writeln(list,'* ',bugs:col,' number of creatures');
writeln(list,'* ',genome:col,
' number of potential binding sites, G');
writeln(list,'* ',gamma:col,
' number of sites per creature, gamma');
writeln(list,'* ',width:col,
' width of the recognizer in bases');
writeln(list,'* ',bpi:col,
' bases per integer of the recognizer');
writeln(list,'* ',e.mu:col,
' mutation rate in hits per creature per generation');
writeln(list,'* ',e.initialseed:col:12,
' initial seed for the random number generator (given in evp)');
writeln(list,'* ',e.cycles:col,' cycles');
writeln(list,'* ',e.displayinterval:col,' display interval');
truthof(e.displayaverage );
writeln(list,' : listing average mistakes');
truthof(e.displaychange );
writeln(list,' : listing changes of mistakes');
truthof(e.displayindividual);
writeln(list,' : listing individuals');
truthof(e.displaystatus);
writeln(list,' : listing status to output');
truthof(e.displayorbits);
writeln(list,' : listing Rindividual orbits');
truthof(e.displayrsequence );
writeln(list,' : listing Rsequence');
truthof(e.displaygenomich );
writeln(list,' : listing genomic uncertainty');
truthof(e.displayonemistake);
writeln(list,' : listing mistakes of best individual');
writeln(list,'* ',e.seed:col:12,
' current seed for the random number generator');
truthof(e.selecting);
writeln(list,' : selecting by mistakes');
writeln(list,'* ',e.storagefrequency:col,' storage frequency');
writeln(list,'* ',e.sigma:col:dec,' sigma, standard deviation of noise');
if e.noise
then writeln(list,'* ',' ':col,' Noise is modeled')
else writeln(list,'* ',' ':col,' No noise is modeled');
writeln(list,'* ',e.specialrule,
' SPECIAL RULE for ties: r: random, b: both, s: sort');
end
end;
(* end module ev.listparams version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.makebase *)
procedure makebase(var seed: real; sa,sac,sacg: real;
var b: base);
(* use the random number seed to generate a base. the parameters
sa, sac and sacg define the frequency of the bases:
sa is the frequency of a,
sac-sa is the frequency of c,
sacg-sac is the frequency of g,
1-sagc is the frequency of t.
no check is made to see that these dividers are correct.
the equiprobable situation would be sa=.25, sac=.5, sacg=.75 *)
begin
random(seed);
if seed <= sa then b := a
else if seed <= sac then b := c
else if seed <= sacg then b := g
else b := t
end;
(* end module ev.makebase version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.creation *)
procedure creation(var list: text; var e: everything);
(* set up the genomes if this is the beginning of an experiment *)
var
done: boolean; (* done looking for an empty place to put the site *)
potential: integer; (* the potential number of places a
site could be located outside the recognizer gene *)
jump: integer; (* the distance between sites *)
ga: integer; (* index to the gamma sites *)
bug, x: integer; (* index to the creature and the genomic location *)
b: base; (* the base made *)
overlapping: boolean; (* true if sites overlap *)
tryspot: integer; (* to see if there is a site at this spot already *)
tries: integer; (* number of times we tried to place a site *)
randomfill: boolean; (* randomly filling site locations. If this fails
to find a site looking twice through the genome, filling
starts from the bottom. *)
(* This array marks the regions that are filled by sites so that
overlapping sites can be avoided if the user wants, using the
randomsites = 'n' option. *)
sitefills: packed array [1..maxgenomesize] of boolean;
s: integer; (* index to sitefills and sitelocations *)
procedure fillerup(tryspot: integer);
(* fill up the arrays at tryspot *)
var
s: integer; (* index to sitefills and sitelocations *)
begin
{
with e.p do begin
write(output,'fillerup: tryspot= ',tryspot:1);
writeln(output,' tryspot+width-1= ',tryspot+width-1:1);
end;
}
with e.p do begin
sitelocations[tryspot] := true;
for s := tryspot to tryspot + width - 1
do begin
if sitefills[s]
then overlapping := true
else sitefills[s] := true;
end;
{
for s := 1 to genome do
if sitefills[s]
then write(output,'*')
else write(output,'.');
writeln(output,'tryspot = ',tryspot:1);
}
end;
end;
begin
overlapping := false;
writeln(list,'*');
if e.generation = 0 then with e.p do begin
getdatetime(e.datetimestamp);
writeln(list,'* creation');
(* clear the sitelocations and sitefills arrays *)
for s := 1 to maxgenomesize do sitelocations[s] := false;
for s := 1 to maxgenomesize do sitefills[s] := false;
(* note: bpthreshold was calculated in makeprecalc *)
(* determine the locations of the sites
4*width*bpi is the last position of the recognizer gene matrix and
(4*width*bpi)+bpthreshold is the last position of the threshold.
Adding one width to this assures that the first site
is not constrained by the recognizer gene.
note:
location of a site is the zero position of the site.
this is one base to the left of the first base recognized.
this avoids extra subtractions in the main search loops. *)
site[1] := 4*width*bpi + bpthreshold + width;
fillerup(site[1]);
potential := genome - site[1] + 1;
writeln(list,'* available places to put sites: ', potential:1);
if gamma > 1 then begin (* at gamma = 1 we already have assigned the
first site *)
if (randomsites = 'r') or
(randomsites = 'n') then begin
randomfill := true;
done := false;
tries := 0;
tryspot := site[1];
for ga := 2 to gamma do begin
if randomfill then begin
done := false;
tries := 0;
while (not done) and (tries < genome) do begin
random(e.seed);
tryspot := round(e.seed*potential) + site[1] + 1;
tries := succ(tries);
(* determine if ANY other site is at tryspot *)
case randomsites of
'r': if not sitelocations[tryspot]
then done := true;
'n': if (not sitefills[tryspot]) and
(not sitefills[tryspot + width - 1])
then done := true;
end;
end;
if not done then begin
{
writeln(output,'randomsites is ',randomsites);
writeln(output,'ga is ',ga:1);
}
writeln(output,'RANDOM PLACEMENT FAILED because',
' an empty spot could not be found');
writeln(output,'in G = ',tries:1, ' tries.');
writeln(output,'Filling sites in from the bottom.');
randomfill := false;
done := false;
tries := 0;
tryspot := site[1];
end;
end;
if not randomfill then begin (* regular filling *)
done := false;
tries := 0;
tryspot := site[1];
(* fill sites at first available place *)
while (not done) and (tries < genome) and
(tryspot < genome) do begin
tryspot := succ(tryspot);
tries := succ(tries);
{
writeln(output,'trying ',tryspot:1);
}
if (not sitefills[tryspot]) and
(not sitefills[tryspot + width - 1])
then begin
done := true
{
;writeln(output,tryspot:1,'is clear');
}
end;
end
end;
if not done then begin
writeln(output,'UNABLE TO FILL IN THIS MANY SITES');
halt;
end;
site[ga] := tryspot;
fillerup(tryspot);
{
writeln(output, 'randomly placed site ',ga:1,' is at ', site[ga]:1);
}
end;
end
else begin (* regular placement of sites spaced 'jump' apart *)
jump := trunc(potential/(gamma-1));
for ga := 2 to gamma do begin
site[ga] := site[ga-1] +jump;
fillerup(site[ga]);
{
writeln(output, 'array placed site ',ga:1,' is at ', site[ga]:1);
}
end
end
end;
(* tell them about it *)
writeln(list,'* zero of the first site is at position ',site[1]:1);
if gamma = 1
then writeln(list,'* one binding site')
else if (randomsites = 'a')
then writeln(list,'* distance between sites is ',jump:1,' bases')
else writeln(list,'* sites placed randomly');
if gamma >1 then
if (jump < width) or overlapping
then begin
writeln(list,'* ******** SITES OVERLAP *********');
writeln(output,'******** SITES OVERLAP *********');
end
else writeln(list,'*')
else writeln(list,'*');
writeln(list,'*');
(* fill the creatures with random stuff *)
for bug := 1 to bugs do with creature[bug] do begin
for b := a to t do composition[b] := 0;
for x := 1 to genomesize
do begin
(* writeln(list,'* bug=',bug:1,' x=',x:1); *)
makebase(e.seed,0.25,0.50,0.75,b);
(*writeln(list,'* b=',ord(b)); *)
genome[x] := b;
composition[b] := composition[b] + 1
(*writeln(list,'* endloop'); *)
end;
end;
end;
(* new method as of 1999 Feb 17 *)
with e.p do
if mutatehalf
then firstmutated := halfofbugs + 1
else firstmutated := 1;
{
writeln(output,'halfofbugs = ',e.p.halfofbugs:1);
halt;
}
writeln(output,'End Creation ------------------------');
end;
(* end module ev.creation version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.decode *)
function decode(var e: everything; bug: integer;
z: integer; basesperinteger: integer;
precalcnegative, precalctwonegative: integer): integer;
(* decode the genetic sequence into an integer. Starting at position
z, the ; basesperinteger bases are translated into a two's complement integer.
precalcnegative and precalctwonegative are precalculated values for
forming the two's complement. see procedure makeprecalc. *)
var
val: integer; (* intermediate values of the decoding *)
place: integer; (* place value of a value *)
x: integer; (* location on the genome *)
begin
(* convert one stretch of the genome into an
integer. the width of the stretch is basesperinteger. *)
val := 0;
x := z + basesperinteger;
with e.p.creature[bug] do begin
for place := 1 to basesperinteger
do val := val + e.precalc.power[place,genome[x-place]]
end;
(* convert to two's complement notation *)
if val >= precalcnegative
then val := val - precalctwonegative;
decode :=val
end;
(* end module ev.decode *)
(* begin module ev.translate *)
procedure translate(var e: everything; bug: integer;
z: integer; var w: wmatrix);
(* convert the gene of the bug at location z into its recognizer w matrix *)
var
x: integer; (* location on the genome *)
l: integer; (* location on the recognizer *)
b: base; (* base touched by a finger of recognizer at l *)
begin
(* writeln(list,'* bug = ',bug:1,' '); *)
x := z ; (* base 1 of the gene is at position z *)
with e.p.creature[bug] do begin
for l := 1 to e.p.width do begin
for b := a to t do begin
w.matrix[b,l] := decode(e,bug,x,e.p.bpi,
e.precalc.negative,e.precalc.twonegative);
x := x + e.p.bpi
(*;writeln(list,'* x=',x:2, ' w(',ord(b):1,',',l:2,') = ',val:4); *)
end;
end;
(* decode the threshold from sequence just following
the rest of the gene *)
(* WARNING: if bpi <> bpthreshold then negative and twonegative
are not correct below. See makeprecalc. *)
w.threshold := decode(e,bug,x,e.p.bpthreshold,
e.precalc.negative,e.precalc.twonegative);
(*
writeln(output,'threshold for bug ',bug:2,'=',w.threshold:3);
*)
end;
end;
(* end module ev.translate version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.score *)
function score(var w: wmatrix; width: integer;
var creature: chromosome; x: integer;
var e: everything): real;
(* Evaluate the w matrix placed on the genome at position x.
Note: x+1 is the first base touched by the w matrix. That is, x is
the zero of the w, and the first base of the w is at x+1.
Note that the score function does not imply how the recognition
occurs. That is done by the recognize function. *)
var
realval: real; (* the value of the site evaluated by w *)
integerval: integer; (* the value of the site evaluated by w *)
l: integer; (* index to w matrix *)
begin
if e.noise then with e do begin
realval := 0.0;
for l:=1 to width do begin
random(seed);
gaussian(seed, haveone, gaussianX, gaussianY);
{
writeln(output,'at score: gaussianX = ',gaussianX:10:8);
}
realval := realval + w.matrix[creature[l+x],l]
+ sigma*gaussianX;
{
writeln(output,'at score: realval = ',realval:10:8);
}
score := realval
end
end
else begin
integerval := 0;
for l:=1 to width
do integerval := integerval + w.matrix[creature[l+x],l];
score := integerval
end
end;
(* end module ev.score version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.recognize *)
function recognize(var w: wmatrix; width: integer;
var creature: chromosome; x: integer;
var e: everything): boolean;
(* Evaluate the w matrix placed on the genome at position x.
Note: x+1 is the first base touched by the w matrix. That is, x is
the zero of the w, and the first base of the w is at x+1.
The test for recognition is that score value is greater than the threshold. *)
begin
recognize := score(w, width, creature, x, e) >= w.threshold
end;
(* end module ev.recognize version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.evaluate *)
procedure evaluate(var e: everything; bug: integer);
(* evaluate the particular bug and put the number of mistakes into
the rank array. *)
var
w: wmatrix; (* the recognizer; translation of the gene *)
m: integer; (* the current number of mistakes counted *)
x: integer; (* position on the genome *)
recognized: boolean; (* a site was recognized at x *)
begin
{writeln(output,'in evaluate, bug: ',bug:1);}
(* translate the recognizer gene into the w matrix *)
translate(e,bug,1,w);
(* scan the genome *)
m := 0;
with e.p.creature[bug] do for x := 0 to e.precalc.endscang do begin
recognized := recognize(w,e.p.width,genome,x,e);
if e.p.sitelocations[x] <> recognized then m := m + 1;
end;
(* record the results *)
e.p.rank[bug,bugid] := bug; (* identify the bug *)
e.p.rank[bug,mistakes] := m
{;writeln(output, 'bug ',bug:1, ' makes ',m:1,' mistakes');}
end;
(* end module ev.evaluate version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.randomize *)
procedure randomize(var e: everything; bug: integer);
(* Instead of evaluating the bug (see procedure evaluate), assign it an
arbitrary number of mistakes. This should destroy the selection. *)
begin
e.p.rank[bug,bugid] := bug; (* identify the bug *)
random(e.seed);
e.p.rank[bug,mistakes] := round(maxbugs*e.seed)
end;
(* end module ev.randomize *)
(* begin module ev.lessthan *)
function lessthan(a, b: position): boolean;
(* is bug a less than bug b - see quicksort *)
begin
with e.p do lessthan := rank[a,mistakes] < rank[b,mistakes]
end;
(* end module ev.lessthan version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.swap *)
procedure swap(a,b: position);
(* switch the order of bugs a and b as refered to in the
rank array. see quicksort. *)
var
holdm, holdn: integer; (* variables for holding the number of
mistakes and the bug id number during the swap *)
begin
with e.p do begin
holdm := rank[a,mistakes];
holdn := rank[a,bugid];
rank[a,mistakes] := rank[b,mistakes];
rank[a,bugid] := rank[b,bugid];
rank[b,mistakes] := holdm;
rank[b,bugid] := holdn
end
end;
(* end module ev.swap version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module quicksort *)
procedure quicksort(left, right: position);
(* quick sort a list between positions left and right, into ascending order.
a position is simply a scalar of the form 0..max.
the array to be sorted is dimensioned 1..max.
(the difference in the ranges is important to the correct operation
of the sort...)
two external routines are used:
function lessthan(a, b: position): boolean is a generalized test for
value-at-a < value-at-b.
procedure swap(a, b: position) switches the items at positions a and b.
since these routines are external, the procedure is general.
this procedure taken from the book
'algorithms + data structures = programs' by niklaus wirth,
prentice-hall, inc., englewood cliffs, n.j.(1976), pp. 76-82 *)
var
lower, upper: position; (* the positions looked at currently *)
center: position; (* the rough center of the region being sorted *)
begin
lower := left;
center := (left + right) div 2;
upper := right;
repeat
while lessthan(lower, center) do lower := succ(lower);
while lessthan(center, upper) do upper := pred(upper);
if lower <= upper then begin
(* ho ho keep track of the center through the map: *)
if lower = center then center:=upper
else if upper = center then center:=lower;
swap(lower, upper);
lower := succ(lower);
upper := pred(upper)
end
until lower > upper;
if left < upper then quicksort(left, upper);
if lower < right then quicksort(lower, right)
end;
(* end module quicksort version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.dorseq *)
procedure dorseq(e: everything; k: integer; var rseq: real; var fbl: blarray);
(* calculate the information content, rseq of the sites in the k'th ranking bug.
The standard deviation is precalculated. Also return the frequency matrix. *)
var
b: base; (* one of the 4 bases, index *)
f: real; (* frequency of base *)
hs: real; (* uncertainty of site, calculated in nits *)
l: integer; (* index across a site *)
nb: array[a..t] of integer; (* numbers of bases *)
rseql: real; (* rseq at position l *)
s: integer; (* index to sites of a bug *)
begin
(*
writeln(list,'dorseq: e.precalc.ehnb=',e.precalc.ehnb:8:5);
writeln(list,'dorseq: e.precalc.hg =',e.precalc.hg:8:5);
*)
rseq := 0.0;
with e.p do begin
with creature[rank[k,bugid]] do begin
for l := 1 to width do begin (* scan across the site *)
hs := 0.0;
for b := a to t do nb[b] := 0; (* clear the array *)
(* scan across the genome for each base in the site *)
for s := 1 to gamma do begin
b := genome[l+site[s]];
nb[b] := nb[b]+1;
(*
write(list,basetochar(genome[l+site[s]]))
*)
end;
(*
writeln(list,'.');
*)
for b := a to t do begin
f := nb[b]/gamma;
fbl[b,l] := f; (* record for other uses *)
if f > 0 then hs := hs - f * ln(f);
(*
writeln(list,'nb[',basetochar(b),']=',nb[b]:1);
writeln(list,'hs = ',(hs/e.precalc.ln2):8:5,' bits');
*)
end;
(* convert to bits *)
rseql := e.precalc.ehnb - (hs / e.precalc.ln2);
(*
writeln(list,'rseql = ',(rseql):8:5,'------------');
*)
rseq := rseq + rseql
end
end
end
end;
(* end module ev.dorseq version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.dori *)
procedure dori(e: everything;
var ri: real;
s: integer; n: integer; fbl: blarray);
(* calculate the individual information content, ri
of the s'th site in the n'th ranking bug, using the frequency matrix, fbl. *)
var
b: base; (* one of the 4 bases, index *)
l: integer; (* index across a site *)
begin
with e.p do begin
with creature[rank[n,bugid]] do begin
ri := 0.0;
for l := 1 to width do begin (* scan across the site *)
b := genome[l+site[s]];
ri := ri + ln(fbl[b,l]);
end;
(* convert to bits and add on L*EHnb *)
ri := ri/e.precalc.ln2 + width * e.precalc.ehnb;
end
end
end;
(* end module ev.dori *)
(* begin module ev.dogenomich *)
procedure dogenomich(e: everything; n: integer; var hg: real);
(* do genomic h; calculate the uncertainty, hg, of the n'th creature's genome *)
var
b: base; (* one of the 4 bases *)
f: real; (* frequency of one of the bases *)
begin
hg := 0;
with e.p do with creature[rank[n,bugid]] do for b := a to t do begin
f := composition[b]/genomesize;
if f > 0 then hg := hg - f * ln(f);
end;
hg := hg/e.precalc.ln2; (* convert to bits *)
hg := hg + e.precalc.eofnforhg; (* add small sampling correction *)
end;
(* end module ev.dogenomich version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.display *)
procedure display(var list: text; var e: everything);
(* produce a display that shows what is happening *)
var
r, (* the number of mistakes one bug makes *)
x, (* the sum of the mistakes *)
xx, (* the sum of the squares of the mistakes *)
bug: integer; (* an index to the bugs *)
mean, (* the average number of mistakes *)
stdev: (* the standard deviation of the number of mistakes *)
real;
(* variables for information *)
b: base; (* index to the composition of the top bug *)
fbl: blarray; (* the f(b,l) array *)
h: real; (* genomic information *)
rseq: real; (* information in the top bug *)
ri: real; (* information in an individual site of the top bug *)
s: integer; (* index to sites *)
(* v: real; (@ for calculating the average of the ri values - test *)
begin
if e.displaystatus then write(output,' gen. ',e.generation:4);
if e.displayindividual or e.displayaverage then begin
(* show the individual bugs, and calculate their average mistates *)
if e.displayindividual then begin
for bug := 1 to e.p.bugs do begin
r := e.p.rank[bug,mistakes];
writeln(list,e.generation:4,
' i ', r:4, ' individual# ',bug:4);
end;
end;
if e.displayaverage then begin
x := 0;
xx := 0;
for bug := 1 to e.p.bugs do begin
r := e.p.rank[bug,mistakes];
x := x + r;
xx := xx + r*r;
end;
mean := x/e.p.bugs;
stdev := sqrt( (xx/e.p.bugs) - mean*mean);
(* show the average mistates *)
if e.displaystatus then write(output,
' ',mean:6:1,
'+/-',stdev:6:1);
writeln(list,e.generation:4,
' a ',mean:6:2, ' +/- ' ,stdev:6:2);
end;
end;
if e.displayonemistake then begin
write(list, e.generation:4,' mistakes: ');
writeln(list, e.p.rank[1,mistakes]:2,
' ',e.p.rank[e.p.bugs,mistakes]:2);
end;
if e.displaystatus then writeln(output, ' [',e.p.rank[1,mistakes]:2,
' ,',e.p.rank[e.p.bugs,mistakes]:2,']');
if e.displaygenomich then begin
dogenomich(e,1,h);
write(list,e.generation:4, ' g ',h:9:5);
for b := a to t
do with e.p
do write(list,' ',creature[rank[1,bugid]].composition[b]:4);
writeln(list)
end;
(*
with e.p do with creature[rank[n,bugid]] do for b := a to t do begin
f := composition[b]/genomesize;
if f > 0 then hg := hg - f * ln(f);
*)
if e.displayrsequence then begin
(* calculate the information, r, for the first individual, and show it *)
dorseq(e,1,rseq,fbl);
writeln(list,e.generation:4,
' r ',rseq:9:5,' +/- ', e.precalc.rstdev:9:5);
if e.displayorbits then begin
(*
v := 0;
*)
for s := 1 to e.p.gamma do begin
dori(e,ri,s,1,fbl); (* 1 means the top bug's information *)
writeln(list,e.generation:4,
' o ',ri:9:5,' +/- ', e.precalc.rstdev:9:5);
(*
v := v + ri;
*)
end;
(* check code; v should equal rseq
writeln(list,e.generation:4,
' v ',(v/e.p.gamma):9:5,' +/- ', e.precalc.rstdev:9:5);
*)
end
end;
(* display mistakes when they change *)
if e.displaychange then begin
if e.p.rank[1,mistakes] <> e.previousmistakes then begin
(* show Rseq if it is available *)
if not e.displayrsequence then rseq := 0.0;
writeln(list,e.generation:4, ' c ',
' ',(e.p.rank[1,mistakes]/e.p.gamma):9:5,
' ',e.p.rank[1,mistakes]:4,
' ',rseq:9:5);
e.previousmistakes := e.p.rank[1,mistakes]
end;
end;
(* the old way of displaying mistakes was on printer lines:
(@ display the mistakes made on a percentage scale @)
(@ variables for the plot @)
buffer: array[0..100] of char; (@ a line buffer ranging
from 0 to 100 percent @)
p: integer; (@ index to buffer @)
numbers: packed array[1..11] of char; (@ symbols to mark
every 10 percent interval of the plot @)
(@ clear the line buffer @)
for p := 0 to 100 do buffer[p] := ' ';
(@ put percentage marks in buffer @)
numbers := '01234567890';
p := 0;
while p <= 100 do begin
buffer[p] := numbers[(p div 10)+1];
p := p + 10
end;
(@ put average mistakes into buffer @)
buffer[round(100*mean/e.precalc.endscang)] := 'a';
(@ put the mistakes into the buffer @)
for bug := 1 to e.p.bugs
do buffer[round(100*e.p.rank[bug,mistakes]/e.precalc.endscang)] := '*';
(@ print the buffer @)
for p := 0 to 100
do write(list, buffer[p]);
writeln(list);
*)
end;
(* end module ev.display version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.reproduce *)
procedure reproduce(var e: everything);
(* Reproduce the bugs that made fewer mistakes by copying them on top
of the ones that made more mistakes. The global variable
'rank' is used to determine which these are. Since the rank array has
been sorted, all references to the bugs are through this mapping.
SPECIAL RULE: if the bugs have the same number of mistakes, reproduction
does not take place. This ensures that the quicksort algorithm does not
affect who takes over the population, and it also preserves the diversity
of the population. [1988 October 26]
Without this, the population is quickly taken over and evolution is extremely
slow!
[2001 Jun 6] implementation of specialrule.
*)
var
victim: integer; (* index to the killed in e *)
killer: integer; (* index to the killer in e *)
begin
with e.p do for killer := 1 to (bugs div 2) do begin
victim := bugs-killer+1;
if rank[victim,mistakes] = rank[killer,mistakes]
then begin
(* deal with a tie case *)
case e.specialrule of
'r': begin
(* random survival of one organism *)
{zzz}
random(e.seed);
if e.seed > 0.5
then creature[rank[victim,bugid]] := creature[rank[killer,bugid]]
else creature[rank[killer,bugid]] := creature[rank[victim,bugid]]
(* sometimes the "victim" kills the "killer"! *)
end;
'b': begin
(* both survive (original method), no change *)
end;
's': begin
(* one survives depending on the sorting algorithm. This is an
arbitrary function of how the quicksort routine works and is
therefore generally not a good way to make the decision. *)
creature[rank[victim,bugid]] := creature[rank[killer,bugid]]
end;
end;
end
else begin
(* No tie, the one with more mistakes dies! *)
creature[rank[victim,bugid]] := creature[rank[killer,bugid]]
end
{original code prior to 2001 june 6:
if rank[victim,mistakes] <> rank[killer,mistakes]
then creature[rank[victim,bugid]] := creature[rank[killer,bugid]]
}
end
end;
(* end module ev.reproduce version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.mutate *)
procedure mutate(var e: everything);
(* mutate all the creatures. *)
var
bug: integer; (* index to the bugs in e *)
hit: integer; (* the number of hits on a bug so far *)
x: integer; (* location of the hit on a bug *)
b: base; (* for storing the new base *)
begin
for bug := e.p.firstmutated to e.p.bugs
do with e.p.creature[bug] do begin
for hit := 1 to e.mu do begin
(* chose place to hit *)
random(e.seed);
x := trunc(e.seed*e.p.genomesize) + 1;
(* remove old composition *)
composition[genome[x]] := composition[genome[x]] - 1;
(* chose new base and make hit *)
makebase(e.seed,0.25,0.50,0.75,b);
genome[x] := b;
(* increment new composition *)
composition[b] := composition[b] + 1
end
end
end;
(* end module ev.mutate version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.order *)
procedure order(var list: text; var e: everything);
(* order the bugs: evaluate, sort and display *)
var
bug: position; (* index to the rank array *)
begin
if e.selecting
then for bug := 1 to e.p.bugs do evaluate(e,bug)
else for bug := 1 to e.p.bugs do randomize(e,bug);
quicksort(1,e.p.bugs);
if (e.generation mod e.displayinterval = 0)
then display(list, e);
end;
(* end module ev.order version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.culture *)
procedure culture(var list: text; var e: everything);
(* culture the bugs for one generation *)
begin
e.generation := e.generation + 1;
reproduce(e);
mutate(e);
order(list, e)
end;
(* end module ev.culture version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module ev.makemoreheader *)
procedure makemoreheader(var list: text; e: everything);
(* make the header to the list file *)
begin
write (list,'* date-time stamp: ');
writedatetime(list,e.datetimestamp);
writeln(list,'*');
writeln(list,'*');
writeln(list,'* the columns are dependent on the flag:');
writeln(list,'* a = give average number of mistakes of population');
writeln(list,'* c = flags change in number of mistakes of primary organism');
writeln(list,'* g = genomic uncertainty (bits) in primary organism');
writeln(list,'* i = give individuals mistakes');
writeln(list,'* r = information in primary organism');
writeln(list,'* m = mistakes range, from lowest to highest');
writeln(list,'* For each line, the data are as follows:');
writeln(list,'* generation# "a" average "+/-" standard-deviation');
writeln(list,'* generation# "c" c/gamma c=change-in-mistakes Rs',
' (if r is on, 0 otherwise)');
writeln(list,'* generation# "i" mistakes "individual#" rank');
writeln(list,'* generation# "g" genomic-uncertainty gna gnc gng gnt');
writeln(list,'* generation# "r" information "+/-" standard-deviation');
writeln(list,'* generation# "mistakes:" lowest highest');
writeln(list,'* ');
writeln(list,'* ');
end;
(* end module ev.makemoreheader version = 2.50; (@ of ev 1988 oct 6 *)
(* begin module putout *)
procedure putout(e: everything; var all: allfile);
(* evaluate the creatures if needed and put everything in the all file *)
var
bug: integer; (* one of the creatures *)
begin
(* For speed's sake, evaluation is not performed at each step if there is
no selection. But it is needed in the final file to allow
sensible display results. *)
if not e.selecting
then for bug := 1 to e.p.bugs do evaluate(e,bug);
puteverything(e, all);
end;
(* end module putout *)
(* begin module ev.themain *)
procedure themain(var list, evp: text; var all: allfile; var e: everything);
(* the main procedure of program ev *)
var
c: integer; (* index for cycles *)
begin (* themain *)
writeln(output,'ev ',version:4:2);
rewrite(list);
writeln(list,'* ev ',version:4:2,' evolution of sites');
getparams(all, evp, e);
listparams(list, e);
makeprecalc(e);
creation(list, e);
makemoreheader(list,e);
order(list, e);
for c := 1 to e.cycles do begin
culture(list,e);
if (c mod e.storagefrequency) = 0 then putout(e, all);
end;
putout(e, all);
end; (* themain *)
(* end module ev.themain version = 2.50; (@ of ev 1988 oct 6 *)
begin (* ev *)
themain(list,evp,all,e);
1: end. (* ev *)