diff -NbBu3r wikipedia-suggest-0.31/CHANGELOG wikipedia-suggest-0.31-modified/CHANGELOG --- wikipedia-suggest-0.31/CHANGELOG 1970-01-01 10:00:00.000000000 +1000 +++ wikipedia-suggest-0.31-modified/CHANGELOG 2006-08-14 17:28:02.000000000 +1000 @@ -0,0 +1,53 @@ + +== Version 0.31, released 12-Aug-2006 == + +* TcpQuery output is now in JSON format. +* Added experimental autocomplete feature. +* Benchmarking: TcpQuery with MemoryQuery backend and 5 + threads on my computer (Pentium D930). I used 10 threads to simulate + queries. I handled 154000 random queries in 24.7 seconds with CPU usage + of 100% (about 6234 queries per second). + +== Version 0.30, released 11-Aug-2006 == + +* Added MemoryQuery backend to TcpQuery (use the "-m" switch). +* Added multi-threading to TcpQuery (use the "-t" switch). +* Added heuristic to choose the correct redirection to keep (based on + similarity with the query) +* Handling of articles with different capitalization (keep all + different capitalizations) (e.g. "Adfa" and "ADFA"). +* Added strict mode (for PHP5), changed from 'var' to 'private' to + keep PHP5 strict mode happy, and added quick accessor method for $res + attribute. +* Removed leading "/" from paths for image directory and query.php. + This allows the web files to be placed in a subdirectory as well as + the root directory, by using relative paths instead. +* When the user made a search (e.g. "fish"), then highlighted all + their search terms, then pressed delete (so that the search field was + now blank), then pressed arrow up or arrow down, then it would show + the old results (e.g. Fishing / FishBase / etc). To prevent this added + an "if" clause to only show results when there is something in the + query field. +* Fixed problem with search for query with quotes. + +== Version 0.20, released 06-Aug-2006 == + +* Added a small heuristic inside the compiler to have only unique + articles in the best records. +* Added a backend in the compiler using slist to use less memory. +* Added that if the user presses 'Enter' in the search textbox whilst + typing out a query, that it automatically choose/open/redirect to the + first item in the list. That way the user can type out what they want, and press + enter to open the first link when they have typed enough to specify it well + enough to get it to the top of the list, all without using the mouse. +* Added that the user can press the down/up arrows to select/highlight a + specified entry on the list (including but not limited to the first + item), and press enter to open it. That way again the user can be lazy + and can select a link without using the mouse, and without typing out + the full title. +* Added redirects to the index. +* Added all php/html/js pages to the source code (in the 'extra' directory). + +== Version 0.10, released 02-Aug-2006 == + +* Initial release, of a "google suggest"-like service for Wikipedia (EN + FR). diff -NbBu3r wikipedia-suggest-0.31/cmd/TcpProcessingThread.cpp wikipedia-suggest-0.31-modified/cmd/TcpProcessingThread.cpp --- wikipedia-suggest-0.31/cmd/TcpProcessingThread.cpp 2006-08-12 00:33:12.000000000 +1000 +++ wikipedia-suggest-0.31-modified/cmd/TcpProcessingThread.cpp 2006-08-14 14:00:36.000000000 +1000 @@ -122,7 +122,7 @@ { tabulars += "[\""; tabulars += backSlash(res[i]->getTitle()); - tabulars += "\", \""; + tabulars += "\",\""; std::stringstream ss; if (res[i]->isRedirection()) @@ -130,17 +130,17 @@ else ss << res[i]->getFreq(); tabulars += ss.str(); - tabulars += "\", \""; + tabulars += "\",\""; if (res[i]->isRedirection()) tabulars += backSlash(articles[res[i]->getRedirection() - 1].getTitle()); - tabulars += "\" ]"; + tabulars += "\"]"; if (i + 1 != res.size()) - tabulars += ", "; + tabulars += ","; } - return "[\"" + backSlash(query) + "\", [" + tabulars + "] ]\n\n"; + return "[" + tabulars + "]\n"; } unsigned TcpProcessingThread::nbWaiting() diff -NbBu3r wikipedia-suggest-0.31/cmd/TcpQuery.cpp wikipedia-suggest-0.31-modified/cmd/TcpQuery.cpp --- wikipedia-suggest-0.31/cmd/TcpQuery.cpp 2006-08-12 00:36:54.000000000 +1000 +++ wikipedia-suggest-0.31-modified/cmd/TcpQuery.cpp 2006-08-14 13:53:29.000000000 +1000 @@ -40,21 +40,21 @@ { tabulars += "[\""; tabulars += backSlash(v[i].getTitle()); - tabulars += "\", \""; + tabulars += "\",\""; std::stringstream ss; ss << v[i].getFreq(); tabulars += ss.str(); - tabulars += "\", \""; + tabulars += "\",\""; if (v[i].isRedirection()) tabulars += backSlash(v[i].getTarget()); - tabulars += "\" ]"; + tabulars += "\"]"; if (i + 1 != v.size()) - tabulars += ", "; + tabulars += ","; } - return "[\"" + backSlash(str) + "\", [" + tabulars + "] ]\n\n"; + return "[" + tabulars + "]\n"; } void version(std::ostream &os) @@ -64,7 +64,7 @@ void usage(const std::string &name, std::ostream &os) { - os << "Usage : " << name << "[Options] port fsa.bin pages.bin" << std::endl + os << "Usage : " << name << " [Options] port fsa.bin pages.bin" << std::endl << std::endl << "Options: " << std::endl << " -h Print this help message" << std::endl diff -NbBu3r wikipedia-suggest-0.31/extra/index.php wikipedia-suggest-0.31-modified/extra/index.php --- wikipedia-suggest-0.31/extra/index.php 2006-08-12 01:40:13.000000000 +1000 +++ wikipedia-suggest-0.31-modified/extra/index.php 2006-08-14 17:16:21.000000000 +1000 @@ -43,6 +43,9 @@ + diff -NbBu3r wikipedia-suggest-0.31/extra/WSuggest.js wikipedia-suggest-0.31-modified/extra/WSuggest.js --- wikipedia-suggest-0.31/extra/WSuggest.js 2006-08-12 18:59:46.000000000 +1000 +++ wikipedia-suggest-0.31-modified/extra/WSuggest.js 2006-08-14 18:39:07.000000000 +1000 @@ -5,10 +5,8 @@ var selection = 0; var objQueryResults = null; var objInput = null; -var cache = true; var query; var answers = []; -var autocomplete = 0; //Parameters var _idInput = "SuggestID"; @@ -44,7 +42,6 @@ // write content of a string in a given layer function writeLayer(objLayer, idLayer, str) { - cache = false; if (nn4) { objLayer.document.write(str); objLayer.document.close(); @@ -60,33 +57,27 @@ } } -// hide layer if it is currently visible -function hide(selectedLine) +// @desc: Hides the results layer if it is currently visible. +function hideResults() { - if (!cache) { - if (selectedLine > 0) { - var res = answers[query]; - } objQueryResults.visibility = "hidden"; - cache = true; - } } -function sendRes(jsonStr) +function sendRes(q, jsonStr) { var o = jsonStr.parseJSON(); - for (var i = 0; i < o[1].length; ++i) + for (var i = 0; i < o.length; i++) { - var title = o[1][i][0]; - var url = "http://en.wikipedia.org/wiki/" + titleToUrl(title); - if (o[1][i][2].length > 0) { - title += " → " + o[1][i][2]; + var title = o[i][0]; + var url = baseUrl + titleToUrl(title); + if (o[i][2].length > 0) { + title += " → " + o[i][2]; } - o[1][i][3] = title; - o[1][i][2] = url; + o[i][3] = title; + o[i][2] = url; } - answers[o[0]] = o[1]; + answers[q] = o; selection = 1; display(selection); // display QueryResults layer @@ -122,7 +113,7 @@ // display string in layer writeLayer(objQueryResults, _idQueryResults, s); } - else { hide(0); } + else { hideResults(); } } function changeHighlight(oldId, newId) @@ -152,7 +143,7 @@ } } -function webQuery(value) +function webQuery(value, showAutocomplete) { var xmlhttp = getHttp(); if (xmlhttp) { @@ -160,25 +151,35 @@ xmlhttp.onreadystatechange=function() { if (xmlhttp.readyState==4) { - sendRes(xmlhttp.responseText); - suggestNextText(); + sendRes(value,xmlhttp.responseText); + if (showAutocomplete) { suggestNextText(value); } } }; xmlhttp.send(null); } } -function selectRange (iStart, iLength) { +// @desc: Updates the text of the textbox with any autocompletion, and selects the autocompleted text. +function selectRange (oldText, iStart, iLength, newText) { + // if the text has changed in the interim, then abort any autocompletion. + if (objInput.value != oldText) { + return; + } + + // otherwise, perform autocomplete. Method to use depends on browser. if (objInput.createTextRange) { + objInput.value = newText; var oRange = objInput.createTextRange(); oRange.moveStart("character", iStart); oRange.moveEnd("character", iLength - objInput.value.length); oRange.select(); } else if (objInput.setSelectionRange) { + objInput.value = newText; objInput.setSelectionRange(iStart, iLength); } } +// @desc: Returns the MediaWiki article URL for a given article title. function titleToUrl(title) { var chr, url = ""; for (var i=0; i 1) { - var end; - start = objInput.value.length; - for (var i=objInput.value.length; i<=res[0][0].length; i++) { + var end, extraText = ""; + for (var i=start; i<=res[0][0].length; i++) { if (res[0][0].charAt(i) == res[1][0].charAt(i)) { - objInput.value += res[selection][0].charAt(i); - autocomplete = 1; + extraText += res[selection][0].charAt(i); } else { end = i; break; } } - selectRange(start, end); + // only autocomplete if we found something. + if (extraText.length >= 1) { + selectRange(oldText, start, end, objInput.value + extraText); + } } } @@ -222,7 +220,6 @@ var intKey = (window.Event) ? e.which : e.keyCode; var value = objInput.value; - if (intKey !== 8) { autocomplete = 0; } if (intKey == 40) // Arrow Down { if (value !== "") { @@ -240,22 +237,25 @@ else if (intKey == 13) // enter { var res = answers[query]; + // Open the first entry in the results, if there is one. if (res && res.length > 0) { window.location.href=res[selection - 1][2]; } + // Otherwise perform a Special:Search on this term. + else { window.location.href=baseUrl + "Special:Search/" + titleToUrl(value); } } - // if it is a normal character key of some description: - else if (intKey == 32 || intKey == 8 || (intKey > 46 && !(intKey >= 112 && intKey <= 123))) { - if (autocomplete == 1 && intKey == 8) { autocomplete = 0; } - else { if (value === "") { hide(0); } + // if the search term changed. + else if (query != value) { + if (value === "") { hideResults(); } else { selection = 1; query = value; + // show the autocomplete only if it is a normal character key of some description: + var showAutocomplete = (intKey == 32 || (intKey > 46 && !(intKey >= 112 && intKey <= 123))); // if we already know the answer, use that, otherwise query web site. if (answers[query]) { display(selection); - suggestNextText(); + if (showAutocomplete) { suggestNextText(query); } } else { - webQuery(value); - } + webQuery(value, showAutocomplete); } } } diff -NbBu3r wikipedia-suggest-0.31/README wikipedia-suggest-0.31-modified/README --- wikipedia-suggest-0.31/README 2006-08-06 23:42:02.000000000 +1000 +++ wikipedia-suggest-0.31-modified/README 2006-08-14 14:18:39.000000000 +1000 @@ -38,3 +38,19 @@ html/javascript/php pages used on suggest.speedblue.org can be found in the extra directory +## Running TcpQuery ## + +You can start TcpQuery with a command like this: +./TcpQuery 22581 ../../EN/fsa.bin ../../EN/pages.bin +(Where "22581" is the port number to use, and the "fsa.bin" and "pages.bin" are files that either you have created + by running Analyzer, or which you have downloaded as precompiled files) + +You can then test that this is working by running a query (e.g. for "test"), like so: +echo test | netcat localhost 22581 + +If it is working, you will get back output that looks something like this: +[["Test cricket","5776",""],["Testament, New","2017","New Testament"],["Testament, Old","1700","Old Testament"], + ["Testudine","527","Turtle"],["Testosterone","355",""],["Test Messaging","289","Short message service"], + ["Testicle","276",""],["Testprog","268","Development stage"],["Testudinidae","252","Tortoise"], + ["Test subject","230","Animal testing"]] +