Blame view
sources/apps/search_lucene/document/Pdf.php
1.84 KB
|
d1bafeea1
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
<?php
namespace OCA\Search_Lucene\Document;
use \OCP\Util;
/**
* PDF document
*/
class Pdf extends \Zend_Search_Lucene_Document
{
/**
* Object constructor
*
* @param string $data
* @param boolean $storeContent
*/
private function __construct($data, $storeContent) {
try {
$zendpdf = \Zend_Pdf::parse($data);
// Store meta data properties
if (isset($zendpdf->properties['Title'])) {
$this->addField(\Zend_Search_Lucene_Field::UnStored('title', $zendpdf->properties['Title']));
}
if (isset($zendpdf->properties['Author'])) {
$this->addField(\Zend_Search_Lucene_Field::UnStored('author', $zendpdf->properties['Author']));
}
if (isset($zendpdf->properties['Subject'])) {
$this->addField(\Zend_Search_Lucene_Field::UnStored('subject', $zendpdf->properties['Subject']));
}
if (isset($zendpdf->properties['Keywords'])) {
$this->addField(\Zend_Search_Lucene_Field::UnStored('keywords', $zendpdf->properties['Keywords']));
}
//TODO handle PDF 1.6 metadata Zend_Pdf::getMetadata()
//do the content extraction
$pdfParse = new \App_Search_Helper_PdfParser();
$body = $pdfParse->pdf2txt($zendpdf->render());
if ($body != '') {
// Store contents
if ($storeContent) {
$this->addField(\Zend_Search_Lucene_Field::Text('body', $body, 'UTF-8'));
} else {
$this->addField(\Zend_Search_Lucene_Field::UnStored('body', $body, 'UTF-8'));
}
}
} catch (\Exception $e) {
Util::writeLog('search_lucene',
$e->getMessage() . ' Trace:
' . $e->getTraceAsString(),
Util::ERROR);
}
}
/**
* Load PDF document from a string
*
* @param string $data
* @param boolean $storeContent
* @return Pdf
*/
public static function loadPdf($data, $storeContent = false)
{
return new Pdf($data, false, $storeContent);
}
}
|