英雄联盟s7全称:求站内搜索算法

来源:百度文库 编辑:杭州交通信息网 时间:2024/04/27 16:12:47
怎样更好地实现站内搜索?

一个例子:From CSDN
sub search {
# Convert multiple blank spaces to single spaces:
$FORM{'terms'} =~ s/\s+/ /g;
$FORM{'terms'} = " $FORM{'terms'} ";

# Convert NOT statements to minus signs:
$FORM{'terms'} =~ s/ not / -/ig;

# Convert AND statements to plus signs:
$FORM{'terms'} =~ s/ and / \+/ig;

# Strip OR statements (OR is the default):
$FORM{'terms'} =~ s/ or / /ig;

# Strip wildcards (bad, bad things!)
$check = 'true' unless ($FORM{'terms'} =~ /\*/);

# Correct for grouped entries:
@terms = split(/\"/,$FORM{'terms'});
$iterator = 0;
$FORM{'terms'} = "";
$placeholder = '%%%==%%%';
foreach $term (@terms)
{
# Do some binary-state switching:
if ($iterator == 1) {$iterator--;}
else {$iterator++;}

# The iterator is 0 during grouped states:
$term =~ s/ /$placeholder/g unless $iterator;
$FORM{'terms'} .= $term;
}
# Done correcting for grouped entries - now all "term1 term2"
# queries have the ugly placeholder holding them together instead
# of blank spaces, so they won't get broken up when we do the
# final splitting by spaces.

@terms = split(/\s+/,$FORM{'terms'});
foreach $term (@terms)
{
# Skip null entries (first and last)
next if ($term eq '');

# Unmask grouped terms:
$term =~ s/$placeholder/ /g;

if ($term =~ /^\+/)
{
$term =~ s/\+//o;
$term = '\W' . $term. '\W' unless ($term =~ /^\$/);
$term =~ s/^\$//;
push(@required,$term);
$required_terms_present = "you bet";
}
elsif ($term =~ /^-/)
{
$term =~ s/-//o;
$term = '\W' . $term. '\W' unless ($term =~ /^\$/);
$term =~ s/^\$//;
push(@forbidden,$term);
$forbidden_terms_present = "fraid so";
}
else
{
$term = '\W' . $term. '\W' unless ($term =~ /^\$/);
$term =~ s/^\$//;
push(@optional,$term);
}
}

foreach $FILE (@FILES) {
open(FILE,"$FILE");
@LINES = <FILE>;
close(FILE);
$string = join(' ',@LINES);
$string =~ s/\n//g;

# Extract the title, if there is one:
if ($string =~ /<font face="楷体_GB2312" size="5" color="#0000FF">(.*)<\/font><\/b>/i)
{
$titles{$FILE} = $1;
}
($file_url) = ($FILE =~ /.*\/(.*)/);
$titles{$FILE} = $file_url unless $titles{$FILE};
$string =~ s/<([^>]|\n)*>//g;

# Check for optional terms:
foreach $term (@optional)
{
$lowercase = $term;
$lowercase =~ tr/[A-Z]/[a-z]/;
$lowercase =~ tr/\\w/\\W/;

if (($term eq $lowercase) && ($check))
{
$include{$FILE} = 'yes' if ($string =~ /$term/i);
}
elsif ($check)
{
$include{$FILE} = 'yes' if ($string =~ /$term/);
}
}
# Done checking for optional terms.

# Check for required terms:
if ($required_terms_present eq "you bet")
{
foreach $term (@required)
{
$lowercase = $term;
$lowercase =~ tr/[A-Z]/[a-z]/;
$lowercase =~ tr/\\w/\\W/;
if (($term eq $lowercase) && ($check))
{
if (($string =~ /$term/i) && ($include{$FILE} ne 'no'))
{
$include{$FILE} = 'yes';
}
else
{
$include{$FILE} = 'no';
last;
}
}
elsif ($check)
{
if (($string =~ /$term/) && ($include{$FILE} ne 'no'))
{
$include{$FILE} = 'yes';
}
else
{
$include{$FILE} = 'no';
last;
}
}
}
}
# Done checking for required terms.

# Check for forbidden terms:
if (($forbidden_terms_present eq "fraid so") && ($check))
{
foreach $term (@forbidden)
{
$lowercase = $term;
$lowercase =~ tr/[A-Z]/[a-z]/;
$lowercase =~ tr/\\w/\\W/;
if (($term eq $lowercase) && ($string =~ /$term/i))
{
$include{$FILE} = 'no';
last;
}
elsif ($string =~ /$term/)
{
$include{$FILE} = 'no';
last;
}
}
}
# Done checking for forbidden terms.

# Allow for wildcard-triggered listing:
$include{$FILE} = 'yes' unless ($check);

# Format for relevance:
if ($include{$FILE} eq 'yes')
{
push(@founds,$FILE);
$hitcount++;
}

} # End loop through all files.
} # End search procedure.

不推荐上面的方法太复杂,简单点用YAHOO!
<!-- SiteSearch Yahoo -->
<form method="get" action="http://search.cn.yahoo.com/search" target="yahoo_window">
<p>
<input type="text" name="p" size="20" maxlength="55" value=""></input><br />
<input type="radio" name="vs" value=""></input><a>Web</a>
<input type="radio" name="vs" value="chedong.com" checked="checked"></input><a>chedong.com</a><br />
<input type="submit" name="sa"></input><br />
</p>
</form>
<!-- SiteSearch Yahoo -->
如果喜欢单独的搜索引擎给你个地址:
http://down.chinaz.com/query.asp?keyword=winrar&action=title