Remove words less than 4 letters from the normal searching path to reduce spurious matches

This commit is contained in:
Chris Morgan
2005-08-06 03:53:08 +00:00
committed by WineHQ
parent 3a5b5f38da
commit 0665823494

View File

@@ -308,6 +308,29 @@ function outputTopXRowAppsFromRating($rating, $num_apps)
}
}
/* return true if this word is in the list of words to ignore */
function isIgnoredWord($sWord)
{
$ignore_words = array('I', 'a', 'about', 'an', 'are', 'as', 'at', 'be', 'by', 'com',
'de', 'en', 'for', 'from', 'how', 'in', 'is', 'it', 'la', 'of',
'on', 'or', 'that', 'the', 'this', 'to', 'was', 'what', 'when',
'where', 'who', 'will', 'with', 'und', 'the', 'www', 'game');
$found = false;
/* search each item in the $ignore_words array */
foreach($ignore_words as $ik=>$iv)
{
/* if we find a match we should flag it as so */
if(strtoupper($sWord) == strtoupper($iv))
{
$found = true;
break; /* break out of this foreach loop */
}
}
return $found;
}
/* remove common words from $search_words to improve our searching results */
function cleanupSearchWords($search_words)
@@ -316,31 +339,17 @@ function cleanupSearchWords($search_words)
/* to improve matching accuracy */
$search_words = trim($search_words);
/* Remove any of the words in the ignore_words array. these are far too common */
/* and will result in way too many matches if we leave them in */
/* We will also remove any single letter search words */
$ignore_words = array('I', 'a', 'about', 'an', 'are', 'as', 'at', 'be', 'by', 'com',
'de', 'en', 'for', 'from', 'how', 'in', 'is', 'it', 'la', 'of',
'on', 'or', 'that', 'the', 'this', 'to', 'was', 'what', 'when',
'where', 'who', 'will', 'with', 'und', 'the', 'www', 'game');
$filtered_search = "";
/* search each word in $search_words */
$split_words = split(" ", $search_words);
foreach($split_words as $key=>$value)
{
/* search each item in the $ignore_words array */
$found = false;
foreach($ignore_words as $ik=>$iv)
{
/* if we find a match we should flag it as so */
if(strtoupper($value) == strtoupper($iv))
{
$found = true;
break; /* break out of this foreach loop */
}
}
/* see if this word is in the ignore list */
/* we remove any of the words in the ignore_words array. these are far too common */
/* and will result in way too many matches if we leave them in */
/* We will also remove any single letter search words */
$found = isIgnoredWord($value);
/* remove all single letters */
if((strlen($value) == 1) && !is_numeric($value))
@@ -374,8 +383,15 @@ function searchForApplication($search_words)
/* cleanup search words */
$search_words = cleanupSearchWords($search_words);
/* split search words up so we can see if any of them match a vendor name or vendor url */
$split_words = split(" ", $search_words);
/* remove any search words less than 4 letters */
$split_words = array();
$split_search_words = split(" ", $search_words);
foreach($split_search_words as $key=>$value)
{
if(strlen($value) >= 4)
array_push($split_words, $value);
}
$vendorIdArray = array();
/* find all of the vendors whos names or urls match words in our */