Skip to content

Extract PDF Document Properties & Meta Information in Browser Using The get pdf prop() function PHP.

pdf_prop.php

<?php
/**
 * get_pdf_prop
 * Get pdf properties from pdf file.
 * @param $file     pdf file.
 * @return array    file properties.
 */
function get_pdf_prop($file) {
    $f = fopen($file, 'rb');
    if (!$f) {
        return false;
    }
    // read the last 16KB
    fseek($f, -16384, SEEK_END);
    $s = fread($f, 16384); // the data
    // extract cross-reference table and trailer
    if (!preg_match('/xref[\r\n]+(.*)trailer(.*)startxref/s', $s, $a)) {
        return false;
    }
    $xref = $a[1]; // the cross reference table
    $trailer = $a[2]; // the trailer
    // extract Info object number
    if (!preg_match('/Info ([0-9]+) /', $trailer, $a)) {
        return false;
    }
    $object_no = $a[1]; // the information object number
    // extract Info object offset
    $lines = preg_split('/[\r\n]+/', $xref);
    $line = $lines[1 + $object_no]; // the line
    $offset = (int) $line;
    if ($offset == 0) {
        return false;
    }
    // read information object
    fseek($f, $offset, SEEK_SET);
    $s = fread($f, 1024); // the data
    fclose($f);
    // extract properties
    if (!preg_match('/<<(.*)>>/Us', $s, $a)) {
        return false;
    }
    $n = preg_match_all('|/([a-z]+) ?\((.*)\)|Ui', $a[1], $a); // the array of entries
    $prop = array();
    for ($i = 0; $i < $n; $i++) {
        $prop[$a[1][$i]] = $a[2][$i];
    }
    return $prop;
}
?>