在php中的文件(doc,docx,xlsx,pdf)中按短语搜索



到目前为止,我已经创建了一个具有搜索功能的网站。用户可以上传到数据库中的文件有不同的扩展名(docx、doc、pdf等)。所以我需要根据文件的内容来搜索它。到目前为止,我无法正确搜索它。我做了两个功能1.按文件名搜索2.按短语搜索

按名称搜索非常有效,但按短语搜索部分存在问题。我可以将这些文件转换为文本文件。但我不知道为什么我不能在那个文件中搜索。那么,有人能告诉我哪里错了吗?或者为我提供另一个解决方案。

这是代码。。。。

主页.php

<form method="post" action="search1.php" class="container 50%" id="searchform">
<input type="text" name="name" placeholder="Enter the terms you wish to search for" />
<input type="submit" name="submit" value="Search" class="fit special" />
<input type="radio" id="name" name="search" value="name" class="fit special" />
<input type="radio" id="phrase" name="search" value="phrase" class="fit special" />                         
</form>

search1.php

<?php require_once("/includes/functions.php"); ?>
<?php require_once("/includes/class.php"); ?>
<?php
$dbhost = "localhost";
$dbuser = "root";
$dbpass = "sandeep";
$dbname = "dbtuts";
mysql_connect($dbhost,$dbuser,$dbpass) or die('cannot connect to the server'); 
mysql_select_db($dbname) or die('database selection problem');
?>
<!DOCTYPE html>
<html>
<head>
<title>SEARCHED FILES</title>
<link rel="stylesheet" href="assets/css/main.css" />
</head>
<body>
<section>   
<div class="table-wrapper">
<table class="alt">
<thead>
<tr>
<th>File Name</th>
<th>View</th>
</tr>
</thead>    
<?php 
if(isset($_POST['submit'])){ 
$name=$_POST['name']; 
if($name!=NULL)
{
if (!empty($_POST['search'])) {
if ($_POST['search']=="phrase") { //search by phrase
$searchthis = $name;
$matches = array();
$query  = "SELECT file from ada ";
$query .= "UNION ";
$query .= "SELECT file from cdr ";
$query .= "UNION ";
$query .= "SELECT file from others ";
$query .= "UNION ";
$query .= "SELECT file from pdr ";
$query .= "UNION ";
$query .= "SELECT file from rr ";
$query .= "UNION ";
$query .= "SELECT file from sdd ";
$query .= "UNION ";
$query .= "SELECT file from tbl_uploads ";
$result = mysql_query($query);
$new_file = fopen("sample.txt","w") or die("Unable to open file!!");
while($row=mysql_fetch_array($result))
{
$filepath = getcwd() . "uploads\".$row['file'];
$path = str_replace('//', '\', $filepath);
$Obj = new DocxConversion($path);
$Text= $Obj->convertToText();
fwrite($new_file,$Text);
echo $new_file."<br/>";
$handle = fopen($new_file, "r");
if ($handle)
{
while (!feof($handle))
{
$buffer = fgets($handle);
if(strpos($buffer, $searchthis) !== FALSE)
{
$matches[] = $row['file'];
break;
}
}
fclose($handle);
}
}
$matches = array_filter($matches);
if (!empty($matches)) 
{
foreach($matches as $row)
{
?>
<tr>
<td><?php echo $row ?></td>
<td><a href="uploads/<?php echo $row ?>" target="_blank">view file</a></td>
</tr>
<?php
}
}
else
{
//echo " Phrase not found!!!";
?>
<script>
alert('Phrase not Found');
window.location.href='homepage.php';
</script>
<?php
}
}
else{                              //search by name
$array = array(
"db1" => "ada",
"db2" => "cdr",
"db3" => "others",
"db4" => "pdr",
"db5" => "rr",
"db6" => "sdd",
"db7" => "tbl_uploads",
);
//connect  to the database 
$db=mysql_connect("localhost","root","sandeep") or die ('I cannot connect to the database  because:'.mysql_error()); 
//-select  the database to use 
$mydb=mysql_select_db("dbtuts"); 
$no_of_access = false;
while ($db_name = current($array)) 
{  
//-query  the database table 
$sql = "SELECT * FROM $db_name WHERE (file LIKE '%$name%')";
//-run  the query against the mysql query function 
$result=mysql_query($sql); 
$num_rows = mysql_num_rows($result);
if($num_rows > 0)
{
//-create  while loop and loop through result set 
$no_of_access = true;
while($row=mysql_fetch_array($result))
{
?>
<tr>
<td><?php echo $row['file'] ?></td>
<td><a href="uploads/<?php echo $row['file'] ?>" target="_blank">view file</a></td>
</tr>
<?php
}
}
else 
{
if(!$no_of_access && $db_name == "tbl_uploads")
//echo "<p> Result not found!!<p>";
{
?>
<script>
alert('Result Not Found!!');
window.location.href='homepage.php';
</script>
<?php
}
}
next($array);
}
}    
}
else
{ 
//echo  "<p>Please select an option</p>"; 
?>
<script>
alert('Please Select an option');
window.location.href='homepage.php';
</script>
<?php
} 
} 
else
{ 
//echo  "<p>Please enter a search query</p>"; 
?>
<script>
alert('Please enter a search query');
window.location.href='homepage.php';
</script>
<?php
} 
}
?> 
</table>
</div>
</section>  
</body> 
</html>

上面的代码完全按文件名搜索,但按短语部分存在一些问题。

class.php

<?php require_once("/includes/pdf.php"); ?>
<?php
class DocxConversion{
private $filename;
public function __construct($filePath) {
$this->filename = $filePath;
}
/************************doc file************************************/
private function read_doc() {
$fileHandle = fopen($this->filename, "r");
$line = @fread($fileHandle, filesize($this->filename));   
$lines = explode(chr(0x0D),$line);
$outtext = "";
foreach($lines as $thisline)
{
$pos = strpos($thisline, chr(0x00));
if (($pos !== FALSE)||(strlen($thisline)==0))
{
} else {
$outtext .= $thisline." ";
}
}
$outtext = preg_replace("/[^a-zA-Z0-9s,.-nrt@/_()]/","",$outtext);
return $outtext;
}
/************************docx file************************************/ 
private function read_docx(){
$striped_content = '';
$content = '';
$zip = zip_open($this->filename);
if (!$zip || is_numeric($zip)) return false;
while ($zip_entry = zip_read($zip)) {
if (zip_entry_open($zip, $zip_entry) == FALSE) continue;
if (zip_entry_name($zip_entry) != "word/document.xml") continue;
$content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));
zip_entry_close($zip_entry);
}// end while
zip_close($zip);
$content = str_replace('</w:r></w:p></w:tc><w:tc>', " ", $content);
$content = str_replace('</w:r></w:p>', "rn", $content);
$striped_content = strip_tags($content);
return $striped_content;
}
/************************PDF file************************************/  
private function read_pdf(){
$a=new PDF2Text();
$a->setFilename($this->filename);
$a->decodePDF();
echo $a->output();
}
/************************excel sheet************************************/
function xlsx_to_text($input_file){
$xml_filename = "xl/sharedStrings.xml"; //content file name
$zip_handle = new ZipArchive;
$output_text = "";
if(true === $zip_handle->open($input_file)){
if(($xml_index = $zip_handle->locateName($xml_filename)) !== false){
$xml_datas = $zip_handle->getFromIndex($xml_index);
$xml_handle = new DOMDocument();
$xml_handle->loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
$output_text = strip_tags($xml_handle->saveXML());
}else{
$output_text .="";
}
$zip_handle->close();
}else{
$output_text .="";
}
return $output_text;
}
/*************************power point files*****************************/
function pptx_to_text($input_file){
$zip_handle = new ZipArchive;
$output_text = "";
if(true === $zip_handle->open($input_file)){
$slide_number = 1; //loop through slide files
while(($xml_index = $zip_handle->locateName("ppt/slides/slide".$slide_number.".xml")) !== false){
$xml_datas = $zip_handle->getFromIndex($xml_index);
$xml_handle = new DOMDocument();
$xml_handle->loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
$output_text .= strip_tags($xml_handle->saveXML());
$slide_number++;
}
if($slide_number == 1){
$output_text .="";
}
$zip_handle->close();
}else{
$output_text .="";
}
return $output_text;
}

public function convertToText() {
if(isset($this->filename) && !file_exists($this->filename)) {
return "File Not exists";
}
$fileArray = pathinfo($this->filename);
$file_ext  = $fileArray['extension'];
if($file_ext == "doc" || $file_ext == "docx" || $file_ext == "xlsx" || $file_ext == "pptx" || $file_ext == "pdf")
{
if($file_ext == "doc") {
return $this->read_doc($this->filename);
} elseif($file_ext == "docx") {
return $this->read_docx($this->filename);
} elseif($file_ext == "xlsx") {
return $this->xlsx_to_text($this->filename);
}elseif($file_ext == "pptx") {
return $this->pptx_to_text($this->filename);
}elseif($file_ext == "pdf") {
return $this->read_pdf($this->filename);
}
} else {
return "Invalid File Type";
}
}
}
?>

上面的代码class.php将doc、docx、xlsx、pdf转换为文本。

pdf.phphttp://pastebin.com/dvwySU1a这个类将pdf文件转换为文本文件。

这部分是错误的(我认为):

fwrite($new_file,$Text);
echo $new_file."<br/>";
$handle = fopen($new_file, "r");

在$new_file中,您有以前fopen中的"文件指针或FALSE"。。此外,您也不会关闭txt文件(如果您要在fwrite之后打开它,则应该在fwrite后调用fclose)。

你为什么不在字符串中搜索这个短语呢。。为什么需要将其写入另一个txt文件?你可以像这里一样搜索文本

最后我自己得到了解决方案

search1.php

<?php require_once("/includes/functions.php"); ?>
<?php require_once("/includes/class.php"); ?>
<?php
$dbhost = "localhost";
$dbuser = "root";
$dbpass = "sandeep";
$dbname = "dbtuts";
mysql_connect($dbhost,$dbuser,$dbpass) or die('cannot connect to the server'); 
mysql_select_db($dbname) or die('database selection problem');
?>
<!DOCTYPE html>
<html>
<head>
<title>SEARCHED FILES</title>
<link rel="stylesheet" href="assets/css/main.css" />
</head>
<body>
<section>   
<div class="table-wrapper">
<table class="alt">
<thead>
<tr>
<th>File Name</th>
<th>View</th>
</tr>
</thead>    
<?php 
if(isset($_POST['submit'])){ 
$name=$_POST['name']; 
if($name!=NULL)
{
if (!empty($_POST['search'])) {
if ($_POST['search']=="phrase") { //search by phrase
$searchthis = strtolower($name);
$matches = array();
$array = array(
"db1" => "ada",
"db2" => "cdr",
"db3" => "others",
"db4" => "pdr",
"db5" => "rr",
"db6" => "sdd",
"db7" => "tbl_uploads",
);

while ($db_name = current($array)) 
{  
$query= "SELECT file FROM $db_name";
$result = mysql_query($query);
while($row=mysql_fetch_array($result))
{
$filepath = getcwd() . "uploads\".$row['file'];
$path = str_replace('//', '\', $filepath);
$Obj = new DocxConversion($path);
$Text= $Obj->convertToText();
$new_file = fopen("sample.txt","w") or die("Unable to open file!!");
fwrite($new_file,strtolower($Text));
$handle = fopen("sample.txt", "r");
if ($handle)
{
while (!feof($handle))
{
$buffer = fgets($handle);
if(strpos($buffer, $searchthis) !== FALSE)
{
$matches[] = $row['file'];
break;
}
}
fclose($handle);
}fclose($new_file);
}next($array);
} 
$matches = array_filter($matches);
if (!empty($matches)) 
{
foreach($matches as $row)
{
?>
<tr>
<td><?php echo $row ?></td>
<td><a href="uploads/<?php echo $row ?>" target="_blank">view file</a></td>
</tr>
<?php
}
}
else
{
//echo " Phrase not found!!!";
?>
<script>
alert('Phrase not Found');
window.location.href='homepage.php';
</script>
<?php
}
}
else{                              //search by name
$array = array(
"db1" => "ada",
"db2" => "cdr",
"db3" => "others",
"db4" => "pdr",
"db5" => "rr",
"db6" => "sdd",
"db7" => "tbl_uploads",
);
//connect  to the database 
$db=mysql_connect("localhost","root","sandeep") or die ('I cannot connect to the database  because:'.mysql_error()); 
//-select  the database to use 
$mydb=mysql_select_db("dbtuts"); 
$no_of_access = false;
while ($db_name = current($array)) 
{  
//-query  the database table 
$sql = "SELECT * FROM $db_name WHERE (file LIKE '%$name%')";
//-run  the query against the mysql query function 
$result=mysql_query($sql); 
$num_rows = mysql_num_rows($result);
if($num_rows > 0)
{
//-create  while loop and loop through result set 
$no_of_access = true;
while($row=mysql_fetch_array($result))
{
?>
<tr>
<td><?php echo $row['file'] ?></td>
<td><a href="uploads/<?php echo $row['file'] ?>" target="_blank">view file</a></td>
</tr>
<?php
}
}
else 
{
if(!$no_of_access && $db_name == "tbl_uploads")
//echo "<p> Result not found!!<p>";
{
?>
<script>
alert('Result Not Found!!');
window.location.href='homepage.php';
</script>
<?php
}
}
next($array);
}
}    
}
else
{ 
//echo  "<p>Please select an option</p>"; 
?>
<script>
alert('Please Select an option');
window.location.href='homepage.php';
</script>
<?php
} 
} 
else
{ 
//echo  "<p>Please enter a search query</p>"; 
?>
<script>
alert('Please enter a search query');
window.location.href='homepage.php';
</script>
<?php
} 
}
?> 
</table>
</div>
</section>  
</body> 
</html>

最新更新