#!./exu
-- File Archive Search
--
-- Searches the descriptions of over 1500 .zip files on RapidEuphoria.com
--
-- The descriptions are stored in a Euphoria EDS database, archive.edb
without type_check
include file.e
include get.e
include wildcard.e as wild
include database.e
include archive.e
constant TRUE = 1, FALSE = 0
constant TO_LOWER = 'a' - 'A'
constant MAX_HITS = 100 -- max number of matching entries to display
constant SCORE = 1, ENTRY = 2
constant OUT_CHUNK_SIZE = 10 -- number of entries to output per table
constant P_GEN = 1, P_DOS = 2, P_WIN = 3, P_LNX = 4
atom t0
t0 = time()
sequence top_hits
top_hits = {}
sequence keywords -- keywords entered by user
sequence platforms -- platforms selected by user
platforms = repeat(FALSE, 4)
integer nhits, totalCount
nhits = 0
totalCount = 0
integer log_file
log_file = -1 -- open("asearch.log", "a")
object query
query = "nothing yet"
function crash(object x)
-- in case of fire ...
integer errfile
errfile = open("ex.err", "a")
puts(errfile, "\nquery is: " & query & '\n')
close(errfile)
-- send an e-mail containing ex.err
system("mail -s \"asearch.exu crashed!\" rds@RapidEuphoria.com < ex.err > /dev/null", 2)
return 0
end function
crash_routine(routine_id("crash"))
function lower(sequence s)
-- (quickly) convert a line to lower case
integer c
for i = 1 to length(s) do
c = s[i]
if c <= 'Z' then
if c >= 'A' then
s[i] = c + TO_LOWER
end if
end if
end for
return s
end function
procedure stats()
-- save some stats for performance analysis
printf(log_file, "matched %d of %d, time: %.2f\n\n",
{nhits, totalCount, time()-t0})
end procedure
procedure html_puts(object text)
-- write HTML output
puts(1, text)
end procedure
procedure html_printf(sequence format, object text)
-- write HTML output
printf(1, format, text)
end procedure
procedure errMessage(sequence msg)
-- issue a fatal error message and quit
html_puts("<p><font face=\"verdana, arial, geneva\" size=-1 color=\"#333333\">")
html_printf("%s </font>\n</body></html>\n", {msg})
if log_file != -1 then
printf(log_file, "%s\n", {msg})
stats()
close(log_file)
end if
abort(1)
end procedure
sequence LETTER
LETTER = repeat(FALSE, 256)
for c = 0 to 255 do
if (c >= '0' and c <= '9') or (c >= 'a' and c <= 'z') then
LETTER[c+1] = TRUE
end if
end for
function hits(sequence line, sequence word, integer n)
-- find out how many matches of word there are in a line.
-- if length(word) is <= n, then we need whole-word match, not a substring
integer len, white_before, white_after, p
atom count
count = 0
while TRUE do
p = match(word, line) -- should fail 99% of the time
if p = 0 then
return count
end if
len = length(word)
if (p = 1) or not LETTER[1+line[p-1]] then
white_before = TRUE
else
white_before = FALSE
end if
if (p + len > length(line)) or not LETTER[1+line[p+len]] then
white_after = TRUE
else
white_after = FALSE
end if
if len <= n then
-- short word - need clear match
if white_before and white_after then
count += 1
end if
else
-- long word - could be a substring, but reduce score
count += 1 - 0.2 * (not white_before) - 0.2 * (not white_after)
end if
line = line[p+len..length(line)]
end while
return count
end function
procedure AddHit(sequence counts, integer rec_num)
-- record the score for a entry
-- keep track of the best scores and entries
atom score
integer p
score = 0.0
if length(counts) = 0 then
-- no keywords were specified
score = 1
else
for i = 1 to length(counts) do
score += sqrt(counts[i])
end for
if score = 0.0 then
return
end if
end if
nhits += 1
if length(top_hits) = 0 then
top_hits = {{score, rec_num}}
else
p = 0
for i = length(top_hits) to 1 by -1 do
if score <= top_hits[i][SCORE] then
p = i
exit
end if
end for
if p != MAX_HITS then
top_hits = append(top_hits[1..p], {score, rec_num}) &
top_hits[p+1..length(top_hits)]
if length(top_hits) > MAX_HITS then
top_hits = top_hits[1..length(top_hits)-1]
end if
end if
end if
end procedure
procedure scan(integer rec_num, sequence keywords, sequence platforms)
-- scan one entry for all possible keywords
sequence counts, href
sequence entry, text
atom bonus
integer p
entry = db_record_data(rec_num)
if not platforms[entry[A_PLATFORM]+1] then
return
end if
if equal(entry[A_CATEGORY], "hide*") then
return
end if
-- only consider filename, not full path
href = entry[A_HREF]
p = length(href)
while p > 0 and href[p] != '/' do
p -= 1
end while
href = href[p+1..length(href)]
text = {entry[A_TITLE], entry[A_NAME],
entry[A_DESCRIPTION], href}
counts = repeat(0, length(keywords))
for i = 1 to length(text) do
text[i] = lower(text[i])
if i <= 2 then
bonus = 1.5 -- hit on title or name
else
bonus = 1.0
end if
bonus *= 1 + sqrt(entry[A_MONEY])/200.0 + (entry[A_YEAR]-1996)*.01 +
entry[A_MONTH]*.01/12.0
for j = 1 to length(keywords) do
counts[j] += bonus * hits(text[i], keywords[j], 3)
end for
end for
AddHit(counts, rec_num)
end procedure
procedure search()
-- top level search
keywords = wild:lower(keywords) -- general lower
if db_open("ARCHIVE.EDB", DB_LOCK_SHARED) != DB_OK then
errMessage("Can't open ARCHIVE.EDB")
end if
if db_select_table("archive") != DB_OK then
errMessage("Can't open archive table")
end if
totalCount = db_table_size()
for i = 1 to totalCount do
scan(i, keywords, platforms)
end for
end procedure
procedure top_link(sequence percent, sequence name, sequence url, sequence w)
-- HTML for one top link
html_puts("<td bgcolor=\"#FFCC66\" width=\"" & percent &
"%\" align=center height=15 valign=bottom>\n")
html_puts("<font face=\"verdana, arial, geneva\" size=-2>\n")
html_puts("<a class=\"toplink\" href=\"" & url & "\"><b>" & name &
"</b></a></font></td>\n")
html_puts("<td bgcolor=\"#FFCC66\" width=\"1%\" valign=top align=right>\n")
html_puts("<img src=\"topcr.gif\" width=5 height=5></td>\n")
if length(w) > 0 then
html_puts("<td bgcolor=\"#FFFFFF\" width=" & w &
"><img src=\"dum.gif\" width=1 height=1></td>\n")
end if
end procedure
procedure top_links()
-- display the top links
html_puts("<table border=0 cellpadding=0 cellspacing=0 width=\"100%\">\n")
html_puts("<tr valign=top>\n")
top_link("7", "Home", "index.html", "1")
top_link("16", "What Is Euphoria?", "hotnew.htm", "1")
top_link("14", "Documentation", "manual.htm", "1")
top_link("7", "News", "news.htm", "1")
top_link("14", "EUforum", "listserv.htm", "1")
top_link("17", "Download Euphoria", "v20.htm", "1")
top_link("18", "Instant Registration!", "reg.htm", "")
html_puts("</tr>\n")
html_puts("<tr>\n")
html_puts("<td colspan=2 bgcolor=\"#FFCC66\" height=3>\n")
html_puts("<img src=\"dum.gif\" width=1 height=1></td>\n")
for i = 1 to 6 do
html_puts("<td bgcolor=\"#FFFFFF\"><img src=\"dum.gif\" width=1 height=1></td>\n")
html_puts("<td colspan=2 bgcolor=\"#FFCC66\">\n")
html_puts("<img src=\"dum.gif\" width=1 height=1></td>\n")
end for
html_puts("</tr>\n")
html_puts("<tr>\n")
html_puts("<td colspan=20 bgcolor=\"#FFFFFF\" height=3>\n")
html_puts("<img src=\"dum.gif\" width=1 height=1></td>\n")
html_puts("</tr>\n")
html_puts("</table>\n")
html_puts("<table border=0 cellpadding=0 cellspacing=0 width=\"100%\">\n")
html_puts("<tr valign=top>\n")
top_link("23", "Recent User Contributions", "contrib.htm", "1")
top_link("12", "The Archive", "archive.htm", "1")
top_link("22", "Other Euphoria Web Sites", "othersit.htm", "1")
top_link("16", "RDS Development", "contract.htm", "1")
top_link("22", "Related Books & Software", "books.htm", "")
html_puts("</tr>\n")
html_puts("<tr>\n")
html_puts("<td colspan=2 bgcolor=\"#FFCC66\" height=3>\n")
for i = 1 to 4 do
html_puts("<img src=\"dum.gif\" width=1 height=1></td>\n")
html_puts("<td bgcolor=\"#FFFFFF\"><img src=\"dum.gif\" width=1 height=1></td>\n")
html_puts("<td colspan=2 bgcolor=\"#FFCC66\">\n")
end for
html_puts("<img src=\"dum.gif\" width=1 height=1></td>\n")
html_puts("</tr>\n")
html_puts("</table>\n")
end procedure
procedure htmlHeader1()
-- First batch of HTML
html_puts("Content-type: text/html\n\n")
html_puts("<html><head>\n")
html_puts("<title>search results for Euphoria File Archive</title>\n")
html_puts("<base href=\"http://www.RapidEuphoria.com/\">\n")
html_puts("<link REL=StyleSheet HREF=\"global.css\" TYPE=\"text/css\" MEDIA=screen>\n")
html_puts("</head>\n")
html_puts("<body bgcolor=\"#FFFFFF\" link=\"#003366\"" &
" vlink=\"#006699\" text=\"#000000\">\n")
html_puts("<basefont size=3>\n\n")
top_links()
html_puts("<table border=0 cellpadding=0 cellspacing=0 width=\"100%\">\n")
html_puts("<tr valign=top>\n")
html_puts("<td width=\"95%\"></td>\n")
html_puts("<td></td>\n")
html_puts("</tr>\n")
html_puts("<tr>\n")
html_puts("<td colspan=2 height=7><img src=\"dum.gif\" width=1 height=1></td>\n")
html_puts("</tr>\n")
html_puts("<tr>\n")
html_puts("<td colspan=2 bgcolor=\"#CCCC99\"><img src=\"dum.gif\" width=1 height=1></td>\n")
html_puts("</tr>\n")
html_puts("<tr>\n")
html_puts("<td colspan=2 height=7><img src=\"dum.gif\" width=1 height=1></td>\n")
html_puts("</tr>\n")
html_puts("<tr>\n")
html_puts("<td align=center>\n")
html_puts("<font face=\"Arial, Helvetica\" color=\"#CC3366\" size=+2><b>\n")
html_puts("Search Results</b></font></td>\n")
html_puts("<td></td>\n")
html_puts("</tr>\n")
html_puts("</table>\n\n")
end procedure
constant months = {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}
procedure htmlHeader2()
-- second batch of HTML
html_puts("<p>\n")
html_puts("<form method=GET action=\"cgi-bin/asearch.exu\">\n\n")
html_puts("<table border=1 cellpadding=0 cellspacing=0 width=\"100%\">\n")
html_puts("<tr bgcolor=\"#FFFFEE\">\n")
html_puts("<td>\n")
html_puts("<table border=0 cellpadding=0 cellspacing=0 width=\"100%\">\n")
html_puts("<tr>\n")
html_puts("<td width=\"9%\"></td>\n")
html_puts("<td width=\"21%\"></td>\n")
html_puts("<td width=\"45%\"></td>\n")
html_puts("<td width=\"25%\"></td>\n")
html_puts("</tr>\n")
html_puts("<tr>\n")
html_puts("<td colspan=4 height=7><img src=\"dum.gif\" width=1 height=1></td>\n")
html_puts("</tr>\n")
html_puts("<tr>\n")
html_puts("<td></td>\n")
html_puts("<td rowspan=3><font face=\"Arial, Helvetica\" size=3 color=\"#990033\">\n")
html_puts("<b>Search Again:</b></font>\n")
html_puts("</td>\n")
html_puts("<td>\n")
html_puts("<font face=\"Arial, Helvetica\" size=-2>\n")
html_puts("<input type=\"CHECKBOX\" name=\"dos\"")
if platforms[P_DOS] then
html_puts("CHECKED")
end if
html_puts("><font color=\"#FF8080\"><b>DOS</b> </font>\n")
html_puts("<input type=\"CHECKBOX\" name=\"win\"")
if platforms[P_WIN] then
html_puts("CHECKED")
end if
html_puts("><font color=\"#8080FF\"><b>WIN</b> </font>\n")
html_puts("<input type=\"CHECKBOX\" name=\"lnx\"")
if platforms[P_LNX] then
html_puts("CHECKED")
end if
html_puts("><font color=\"#60EE60\"><b>LNX</b> </font>\n")
html_puts("<input type=\"CHECKBOX\" name=\"gen\"")
if platforms[P_GEN] then
html_puts("CHECKED")
end if
html_puts("><font color=\"#808080\"><b>GEN</b></font>\n")
html_puts("</font>\n")
html_puts("<td align=center><font face=\"Comic Sans MS\" size=-2><i>\n")
html_puts("<a href=\"asearch.txt\">Powered by Euphoria</a></i></font>\n")
html_puts("</td>\n")
html_puts("</tr>\n")
html_puts("<tr>\n")
html_puts("<td height=3><img src=\"dum.gif\" width=1 height=1></td>\n")
html_puts("<td colspan=2><img src=\"dum.gif\" width=1 height=1></td>\n")
html_puts("</tr>\n")
html_puts("<tr>\n")
html_puts("<td></td>\n")
html_puts("<td>\n")
html_puts("<input type=\"text\" name=\"keywords\" size=38 \n")
html_puts("value=\"")
for i = 1 to length(keywords) do
for j = 1 to length(keywords[i]) do
if keywords[i][j] = '"' then
html_puts(""")
else
html_puts(keywords[i][j])
end if
end for
if i != length(keywords) then
html_puts(" ") --"+")
end if
end for
html_puts("\">\n")
html_puts("</td>\n")
html_puts("<td>\n")
html_puts("<input type=\"submit\" value=\"Search!\"></td>\n")
html_puts("</tr>\n")
html_puts("<tr>\n")
html_puts("<td colspan=4 height=3><img src=\"dum.gif\" width=1 height=1></td>\n")
html_puts("</tr>\n")
html_puts("<tr>\n")
html_puts("<td colspan=2></td>\n")
html_puts("<td colspan=2>\n")
html_puts("<font face=\"verdana, arial, geneva\" size=-2 color=\"#333333\">\n")
html_puts("Type one or more words.\n")
html_puts("</font></td>\n")
html_puts("</tr>\n")
html_puts("<tr>\n")
html_puts("<td colspan=4 height=7><img src=\"dum.gif\" width=1 height=1></td>\n")
html_puts("</tr>\n")
html_puts("</table>\n")
html_puts("</td>\n")
html_puts("</tr>\n")
html_puts("</table>\n")
html_puts("</form>\n")
html_puts("<p>\n")
end procedure
procedure printTabHead()
-- HTML for table of messages
html_puts("<table border=0 width=\"100%\" cellpadding=0 cellspacing=0>\n")
html_puts("<tr>\n")
html_puts("<td width=\"7%\" nowrap></td>\n")
html_puts("<td width=\"31%\" nowrap></td>\n")
html_puts("<td width=\"6%\" nowrap></td>\n")
html_puts("<td width=\"23%\" nowrap></td>\n")
html_puts("<td width=\"19%\" nowrap></td>\n")
html_puts("<td width=\"9%\" nowrap></td>\n")
html_puts("<td width=\"4%\" nowrap></td>\n")
html_puts("<td width=\"1%\" nowrap></td>\n")
html_puts("</tr>\n")
end procedure
function recent(integer month_num, integer year_num)
-- Is the date in the past 6 months (roughly)?
integer i, j
sequence current_date
i = year_num * 12 + month_num
current_date = date()
j = (current_date[1] + 1900) * 12 + current_date[2]
if j - i <= 6 then
return TRUE
else
return FALSE
end if
end function
function strip_hash(sequence desc)
-- remove separator between main and update descriptions
integer m
m = match("## ", desc)
if m then
desc = desc[1..m-1] & desc[m+3..length(desc)]
end if
return desc
end function
procedure display_entry(integer rec_num)
-- display an archive entry using HTML
sequence entry, thedate, desc
entry = db_record_data(rec_num)
html_puts("<tr bgcolor=")
if entry[A_PLATFORM] = 0 then
html_puts("\"#C0C0C0\"")
elsif entry[A_PLATFORM] = 1 then
html_puts("\"#FFCCCC\"")
elsif entry[A_PLATFORM] = 2 then
html_puts("\"#CCCCFF\"")
else
html_puts("\"#BBEEBB\"")
end if
html_puts("><td><font color=\"FFFFFF\"\nface=\"Arial\"><b>")
if entry[A_PLATFORM] = 0 then
html_puts("GEN")
elsif entry[A_PLATFORM] = 1 then
html_puts("DOS")
elsif entry[A_PLATFORM] = 2 then
html_puts("WIN")
else
html_puts("LNX")
end if
html_puts("</b></font></td>\n<td><font face=\"Arial\" size=2><a href=\"" &
entry[A_HREF] & "\">")
html_puts("<b>" & entry[A_TITLE] & "</b></a></font></td>\n")
html_printf("<td align=right><font face=\"Arial\" size=2>%dK</font></td>",
entry[A_SIZE])
html_puts("<td align=center><font face=\"Arial\" size=2><b>" &
entry[A_NAME] & "</b></font></td>\n")
html_puts("<td><font face=\"Arial\" size=2>")
thedate = sprintf("%s %d/%02d", {months[entry[A_MONTH]], entry[A_DAY],
remainder(entry[A_YEAR], 100)})
if entry[A_UPDATED] then
if recent(entry[A_YEAR], entry[A_MONTH]) then
html_puts("<font color=\"#5500FF\">updated</font> " & thedate)
else
html_puts(thedate)
end if
else
html_puts(thedate)
end if
html_puts("</font></td>\n")
if entry[A_MONEY] = 0 then
html_puts("<td> </td>\n")
else
html_printf("<td><img src=\"happy.gif\" alt=\"this program has happy users!\" width=17 height=17>" &
"<font face=\"Arial\" size=2> %.2f</font></td>\n",
entry[A_MONEY]/100)
end if
html_puts("<td valign=middle align=right>" &
"<a href=\"cgi-bin/usercont.exu?dbId=" &
sprintf("%d\" target=\"_blank\">", db_record_key(rec_num)) &
"<img src=\"upload.gif\" alt=\"click here to update this entry\"border=\"0\" width=11 height=11></a>" &
"</td>\n")
html_puts("<td></td></tr>\n")
desc = entry[A_DESCRIPTION]
if entry[A_UPDATED] then
desc = strip_hash(desc)
end if
html_puts("<tr><td colspan=7><font face=\"Arial\" size=2>" &
desc & "\n") -- '\n' meaningful
html_puts("</font></td><td></td></tr>")
html_puts("<tr><td colspan=8> </td></tr>\n\n")
end procedure
procedure printResult()
-- output the entries that match
integer t, tabSize, numLeft, numIteration
if length(top_hits) = 0 then
errMessage("No match found. Try again.")
html_puts("\n</body></html>\n")
return
end if
puts(1,
"<p><font face=\"verdana, arial, geneva\" size=-1 color=\"#333333\">")
html_printf("matched %d of %d entries", {nhits, totalCount})
if nhits > MAX_HITS then
html_printf(" - displaying the best %d", MAX_HITS)
end if
html_puts("</font>\n<p>\n")
tabSize = OUT_CHUNK_SIZE -- number of entries in one table for speed
if nhits < MAX_HITS then
numLeft = nhits -- remaining number of entries still to be printed
else
numLeft = MAX_HITS
end if
t = 1
while numLeft > 0 do
if numLeft <= tabSize then
numIteration = numLeft
numLeft = 0
else
numIteration = tabSize
numLeft -= tabSize
end if
-- Web browser must receive a complete table before it displays anything
printTabHead()
for k = 1 to numIteration do
display_entry(top_hits[t][ENTRY])
t += 1
end for
html_puts("\n</table>\n")
end while
html_puts("<p> <br>\n")
html_puts("<font face=\"verdana, arial, geneva\" size=2 color=\"#333333\">")
html_puts("<center>End of Search Results</center></font>\n")
html_puts("\n</body></html>\n")
end procedure
function hex_char(integer c)
-- is c a valid hex character?
return find(c, "0123456789ABCDEFabcdef")
end function
function hex_val(integer c)
-- return value of a hex character
if c >= 'A' and c <= 'F' then
return 10 + c - 'A'
elsif c >= 'a' and c <= 'f' then
return 10 + c - 'a'
else
return c - '0'
end if
end function
function parse_input(sequence s)
-- crack the syntax sent from Web browser: aaa=bbb&ccc=ddd&...
-- Convert to {{"aaa", "bbb"}, {"ccc", "ddd"}, ...} left-right pairs
integer i, c
sequence word_pairs, left_word, right_word
word_pairs = {}
i = 1
s &= {0,0,0} -- end markers
while s[i] != 0 do
left_word = ""
while 1 do
-- build left word
c = s[i]
if c = '=' or c = '&' or c = 0 then
exit
end if
if c = '%' and hex_char(s[i+1]) and hex_char(s[i+2]) then
c = 16 * hex_val(s[i+1]) + hex_val(s[i+2])
i += 2
elsif c = '+' then
c = ' '
end if
left_word &= c
i += 1
end while
i += 1
right_word = ""
while 1 do
-- build right word
c = s[i]
if c = '&' or c = 0 then
exit
end if
if c = '%' and hex_char(s[i+1]) and hex_char(s[i+2]) then
c = 16 * hex_val(s[i+1]) + hex_val(s[i+2])
i += 2
elsif c = '+' then
c = ' '
end if
right_word &= c
i += 1
end while
i += 1
word_pairs = append(word_pairs, {left_word, right_word})
end while
return word_pairs
end function
function getKeywords()
-- get values from the CGI query string, e.g.
-- dos=on&keywords=apple+orange
sequence keystring
sequence key, pairs, var, val, d
query = getenv("QUERY_STRING")
if atom(query) then
query = getenv("query_string")
if atom(query) then
errMessage("Internal Error - no query_string")
end if
end if
if log_file != -1 then
d = date()
d[1] += 1900
printf(log_file, "%d-%d-%d %d:%02d\n%s\n", append(d[1..5], query))
flush(log_file)
end if
pairs = parse_input(query)
keystring = ""
for i = 1 to length(pairs) do
var = lower(pairs[i][1])
val = pairs[i][2]
if equal(var, "dos") then
platforms[P_DOS] = TRUE
elsif equal(var, "win") then
platforms[P_WIN] = TRUE
elsif equal(var, "lnx") then
platforms[P_LNX] = TRUE
elsif equal(var, "gen") then
platforms[P_GEN] = TRUE
elsif equal(var, "keywords") then
keystring = val
end if
end for
keywords = {}
if length(keystring) = 0 then
return "Enter one or more words for searching. Try again."
end if
-- make list of keywords from keystring
key = ""
keystring &= ' '
for i = 1 to length(keystring) do
if keystring[i] = ' ' then
if length(key) then
keywords = append(keywords, key)
key = ""
end if
elsif keystring[i] != '"' then
key = append(key, keystring[i])
end if
end for
if length(keywords) = 0 then
return "Type one or more keywords for search. Try again."
end if
return ""
end function
sequence msg
htmlHeader1()
msg = getKeywords()
htmlHeader2()
if length(msg) > 0 then
errMessage(msg)
end if
flush(1)
search()
printResult()
if log_file != -1 then
stats()
close(log_file)
end if
db_close()