Skip to content
Snippets Groups Projects
Commit 3a08025d authored by Olav Morken's avatar Olav Morken
Browse files

Add support for conditional get of metadata files.

This patch adds support for only updating metadata files that have
changed on the server. This reduces bandwidth used, and also allows
us to skip metadata parsing, which speeds things up significantly.

Thanks to Dyonisius Visser for implementing this!

git-svn-id: https://simplesamlphp.googlecode.com/svn/trunk@2980 44740490-163a-0410-bde0-09ae8108e29a
parent 5ac62a5b
No related branches found
No related tags found
No related merge requests found
......@@ -2117,9 +2117,10 @@ class SimpleSAML_Utilities {
*
* @param string $path The path or URL we should fetch.
* @param array $context Extra context options. This parameter is optional.
* @return string The data we fetched.
* @param boolean $getHeaders Whether to also return response headers. Optional.
* @return mixed array if $getHeaders is set, string otherwise
*/
public static function fetch($path, $context = array()) {
public static function fetch($path, $context = array(), $getHeaders = FALSE) {
assert('is_string($path)');
$config = SimpleSAML_Configuration::getInstance();
......@@ -2141,6 +2142,25 @@ class SimpleSAML_Utilities {
throw new SimpleSAML_Error_Exception('Error fetching ' . var_export($path, TRUE) . ':' . self::getLastError());
}
// Data and headers.
if ($getHeaders) {
$headers = array();
foreach($http_response_header as $h) {
if(preg_match('@^HTTP/1\.[01]\s+\d{3}\s+@', $h)) {
$headers = array(); // reset
$headers[0] = $h;
continue;
}
$bits = explode(':', $h, 2);
if(count($bits) === 2) {
$headers[strtolower($bits[0])] = trim($bits[1]);
}
}
return array($data, $headers);
}
return $data;
}
......
......@@ -9,6 +9,14 @@ $config = array(
# 'http://my.own.uni/idp'
#),
/*
* Conditional GET requests
* Efficient downloading so polling can be done more frequently.
* Works for sources that send 'Last-Modified' or 'Etag' headers.
* Note that the 'data' directory needs to be writable for this to work.
*/
#'conditionalGET' => TRUE,
'sets' => array(
'kalmar' => array(
......@@ -16,7 +24,7 @@ $config = array(
'sources' => array(
array(
/*
* entityIDs that should be excluded from this set.
* entityIDs that should be excluded from this src.
*/
#'blacklist' => array(
# 'http://some.other.uni/idp',
......@@ -30,8 +38,9 @@ $config = array(
# 'http://some.other.uni/idp',
#),
'src' => 'https://kalmar.feide.no/simplesaml/module.php/aggregator/?id=kalmarcentral&mimetype=text/plain&exclude=norway',
'validateFingerprint' => '591d4b4670463eeda91fcc816dc0af2a092aa801',
#'conditionalGET' => TRUE,
'src' => 'https://kalmar2.org/simplesaml/module.php/aggregator/?id=kalmarcentral&set=saml2&exclude=norway',
'validateFingerprint' => '59:1D:4B:46:70:46:3E:ED:A9:1F:CC:81:6D:C0:AF:2A:09:2A:A8:01',
'template' => array(
'tags' => array('kalmar'),
'authproc' => array(
......
......@@ -16,6 +16,7 @@ function metarefresh_hook_cron(&$croninfo) {
$mconfig = SimpleSAML_Configuration::getOptionalConfig('config-metarefresh.php');
$sets = $mconfig->getConfigList('sets', array());
$stateFile = $config->getPathValue('datadir', 'data/') . 'metarefresh-state.php';
foreach ($sets AS $setkey => $set) {
// Only process sets where cron matches the current cron tag.
......@@ -31,11 +32,21 @@ function metarefresh_hook_cron(&$croninfo) {
$expire = NULL;
}
$metaloader = new sspmod_metarefresh_MetaLoader($expire);
$outputDir = $set->getString('outputDir');
$outputDir = $config->resolvePath($outputDir);
$outputFormat = $set->getValueValidate('outputFormat', array('flatfile', 'serialize'), 'flatfile');
$oldMetadataSrc = SimpleSAML_Metadata_MetaDataStorageSource::getSource(array(
'type' => $outputFormat,
'directory' => $outputDir,
));
$metaloader = new sspmod_metarefresh_MetaLoader($expire, $stateFile, $oldMetadataSrc);
# Get global blacklist
# Get global blacklist, whitelist and caching info
$blacklist = $mconfig->getArray('blacklist', array());
$whitelist = $mconfig->getArray('whitelist', array());
$conditionalGET = $mconfig->getBoolean('conditionalGET', FALSE);
foreach($set->getArray('sources') AS $source) {
......@@ -53,14 +64,18 @@ function metarefresh_hook_cron(&$croninfo) {
$source['whitelist'] = $whitelist;
}
# Let src specific conditionalGET override global one
if(!isset($source['conditionalGET'])) {
$source['conditionalGET'] = $conditionalGET;
}
SimpleSAML_Logger::debug('cron [metarefresh]: In set [' . $setkey . '] loading source [' . $source['src'] . ']');
$metaloader->loadSource($source);
}
$outputDir = $set->getString('outputDir');
$outputDir = $config->resolvePath($outputDir);
// Write state information back to disk
$metaloader->writeState();
$outputFormat = $set->getValueValidate('outputFormat', array('flatfile', 'serialize'), 'flatfile');
switch ($outputFormat) {
case 'flatfile':
$metaloader->writeMetadataFiles($outputDir);
......
......@@ -7,8 +7,14 @@
class sspmod_metarefresh_MetaLoader {
private $metadata;
private $expire;
private $metadata;
private $oldMetadataSrc;
private $stateFile;
private $changed;
private static $types = array('saml20-idp-remote', 'saml20-sp-remote',
'shib13-idp-remote', 'shib13-sp-remote', 'attributeauthority-remote');
/**
* Constructor
......@@ -16,65 +22,172 @@ class sspmod_metarefresh_MetaLoader {
* @param array $sources Sources...
* @param
*/
public function __construct($expire = NULL) {
$this->expire = $expire;
public function __construct($expire = NULL, $stateFile = NULL, $oldMetadataSrc = NULL) {
$this->expire = $expire;
$this->metadata = array();
$this->oldMetadataSrc = $oldMetadataSrc;
$this->stateFile = $stateFile;
$this->changed = FALSE;
// Read file containing $state from disk
if(is_readable($stateFile)) {
require($stateFile);
}
$this->state = (isset($state)) ? $state : array();
}
/**
* This function processes a SAML metadata file.
*
* @param $src Filename of the metadata file.
* @param $source
*/
public function loadSource($source) {
$entities = array();
$context = NULL;
$config = SimpleSAML_Configuration::getInstance();
$name = $config->getString('technicalcontact_name', NULL);
$mail = $config->getString('technicalcontact_email', NULL);
$rawheader = "User-Agent: SimpleSAMLphp metarefresh, run by $name <$mail>\r\n";
if (isset($source['conditionalGET']) && $source['conditionalGET']) {
if(array_key_exists($source['src'], $this->state)) {
$sourceState = $this->state[$source['src']];
if(isset($sourceState['last-modified'])) {
$rawheader .= 'If-Modified-Since: ' . $sourceState['last-modified'] . "\r\n";
}
if(isset($sourceState['etag'])) {
$rawheader .= 'If-None-Match: ' . $sourceState['etag'] . "\r\n";
}
}
}
// Build new HTTP context
$context = array('http' => array('header' => $rawheader));
// GET!
try {
$entities = SimpleSAML_Metadata_SAMLParser::parseDescriptorsFile($source['src']);
list($data, $responseHeaders) = SimpleSAML_Utilities::fetch($source['src'], $context, TRUE);
} catch(Exception $e) {
SimpleSAML_Logger::warning('metarefresh: Failed to retrieve metadata. ' . $e->getMessage());
}
foreach($entities as $entity) {
//SimpleSAML_Logger::debug('All response headers: ' . var_export($responsHeaders,1));
$status = $responseHeaders[0];
if(isset($source['blacklist'])) {
if(!empty($source['blacklist']) && in_array($entity->getEntityID(), $source['blacklist'])) {
SimpleSAML_Logger::info('Skipping "' . $entity->getEntityID() . '" - blacklisted.' . "\n");
continue;
if(preg_match('@^HTTP/1\.[01]\s304\s@', $status ) && isset($this->oldMetadataSrc)) {
// Not-Modified. This could only have happened if 'conditionalGET' was used.
SimpleSAML_Logger::debug('Received \'' . $status . '\', re-using cached metadata');
foreach(self::$types as $type) {
foreach($this->oldMetadataSrc->getMetadataSet($type) as $entity) {
if(array_key_exists('metarefresh:src', $entity)) {
if($entity['metarefresh:src'] == $source['src']) {
//SimpleSAML_Logger::debug('Re-using cached metadata for ' . $entity['entityid']);
$this->addMetadata($source['src'], $entity, $type);
}
}
}
}
} else {
if(isset($source['whitelist'])) {
if(!empty($source['whitelist']) && !in_array($entity->getEntityID(), $source['whitelist'])) {
SimpleSAML_Logger::info('Skipping "' . $entity->getEntityID() . '" - not in the whitelist.' . "\n");
continue;
// Stale or no metadata, so a fresh copy
if (isset($source['conditionalGET']) && $source['conditionalGET']) {
SimpleSAML_Logger::debug('Downloaded fresh copy');
}
$entities = array();
try{
$doc = new DOMDocument();
$res = $doc->loadXML($data);
if($res !== TRUE) {
throw new Exception('Failed to read XML from ' . $source['src']);
}
if($doc->documentElement === NULL) throw new Exception('Opened file is not an XML document: ' . $source['src']);
$entities = SimpleSAML_Metadata_SAMLParser::parseDescriptorsElement($doc->documentElement);
} catch(Exception $e) {
SimpleSAML_Logger::warning('metarefresh: Failed to retrieve metadata. ' . $e->getMessage());
}
if(array_key_exists('validateFingerprint', $source) && $source['validateFingerprint'] !== NULL) {
if(!$entity->validateFingerprint($source['validateFingerprint'])) {
SimpleSAML_Logger::info('Skipping "' . $entity->getEntityId() . '" - could not verify signature.' . "\n");
continue;
foreach($entities as $entity) {
if(isset($source['blacklist'])) {
if(!empty($source['blacklist']) && in_array($entity->getEntityID(), $source['blacklist'])) {
SimpleSAML_Logger::info('Skipping "' . $entity->getEntityID() . '" - blacklisted.' . "\n");
continue;
}
}
if(isset($source['whitelist'])) {
if(!empty($source['whitelist']) && !in_array($entity->getEntityID(), $source['whitelist'])) {
SimpleSAML_Logger::info('Skipping "' . $entity->getEntityID() . '" - not in the whitelist.' . "\n");
continue;
}
}
if(array_key_exists('validateFingerprint', $source) && $source['validateFingerprint'] !== NULL) {
if(!$entity->validateFingerprint($source['validateFingerprint'])) {
SimpleSAML_Logger::info('Skipping "' . $entity->getEntityId() . '" - could not verify signature.' . "\n");
continue;
}
}
$template = NULL;
if (array_key_exists('template', $source)) $template = $source['template'];
$this->addMetadata($source['src'], $entity->getMetadata1xSP(), 'shib13-sp-remote', $template);
$this->addMetadata($source['src'], $entity->getMetadata1xIdP(), 'shib13-idp-remote', $template);
$this->addMetadata($source['src'], $entity->getMetadata20SP(), 'saml20-sp-remote', $template);
$this->addMetadata($source['src'], $entity->getMetadata20IdP(), 'saml20-idp-remote', $template);
$attributeAuthorities = $entity->getAttributeAuthorities();
if (!empty($attributeAuthorities)) {
$this->addMetadata($source['src'], $attributeAuthorities[0], 'attributeauthority-remote', $template);
}
}
$template = NULL;
if (array_key_exists('template', $source)) $template = $source['template'];
$this->addMetadata($source['src'], $entity->getMetadata1xSP(), 'shib13-sp-remote', $template);
$this->addMetadata($source['src'], $entity->getMetadata1xIdP(), 'shib13-idp-remote', $template);
$this->addMetadata($source['src'], $entity->getMetadata20SP(), 'saml20-sp-remote', $template);
$this->addMetadata($source['src'], $entity->getMetadata20IdP(), 'saml20-idp-remote', $template);
$attributeAuthorities = $entity->getAttributeAuthorities();
if (!empty($attributeAuthorities)) {
$this->addMetadata($source['src'], $attributeAuthorities[0], 'attributeauthority-remote', $template);
}
// Save state for this src
if (isset($source['conditionalGET']) && $source['conditionalGET']) {
// Headers section
$candidates = array('last-modified', 'etag');
foreach($candidates as $candidate) {
if(array_key_exists($candidate, $responseHeaders)) {
$this->state[$source['src']][$candidate] = $responseHeaders[$candidate];
}
}
if(!empty($this->state[$source['src']])) {
// Timestamp when this src was requested.
$this->state[$source['src']]['requested_at'] = $this->getTime();
$this->changed = TRUE;
}
}
}
/**
* This function write the state array back to disk
*/
public function writeState() {
if($this->changed) {
SimpleSAML_Logger::debug('Writing: ' . $this->stateFile);
SimpleSAML_Utilities::writeFile(
$this->stateFile,
"<?php\n/* This file was generated by the metarefresh module at ".$this->getTime() . ".\n".
" Do not update it manually as it will get overwritten. */\n".
'$state = ' . var_export($this->state, TRUE) . ";\n?>\n"
);
}
}
/**
* This function writes the metadata to stdout.
*/
......@@ -126,6 +239,7 @@ class sspmod_metarefresh_MetaLoader {
$metadata = array_merge($metadata, $template);
}
$metadata['metarefresh:src'] = $filename;
if(!array_key_exists($type, $this->metadata)) {
$this->metadata[$type] = array();
}
......@@ -138,11 +252,11 @@ class sspmod_metarefresh_MetaLoader {
// Override metadata expire with more restrictive global config-
if ($this->expire < $metadata['expire'])
$metadata['expire'] = $this->expire;
$metadata['expire'] = $this->expire;
// If expire is not already in metadata use global config
} else {
$metadata['expire'] = $this->expire;
$metadata['expire'] = $this->expire;
}
}
......@@ -201,34 +315,33 @@ class sspmod_metarefresh_MetaLoader {
}
}
foreach($this->metadata as $category => $elements) {
$filename = $outputDir . '/' . $category . '.php';
SimpleSAML_Logger::debug('Writing: ' . $filename . "\n");
$fh = @fopen($filename, 'w');
if($fh === FALSE) {
throw new Exception('Failed to open file for writing: ' . $filename . "\n");
exit(1);
}
fwrite($fh, '<?php' . "\n");
foreach($elements as $m) {
$filename = $m['filename'];
$entityID = $m['metadata']['entityid'];
fwrite($fh, "\n");
fwrite($fh, '/* The following metadata was generated from ' . $filename . ' on ' . $this->getTime() . '. */' . "\n");
fwrite($fh, '$metadata[\'' . addslashes($entityID) . '\'] = ' . var_export($m['metadata'], TRUE) . ';' . "\n");
foreach(self::$types as $type) {
$filename = $outputDir . '/' . $type . '.php';
if(array_key_exists($type, $this->metadata)) {
$elements = $this->metadata[$type];
SimpleSAML_Logger::debug('Writing: ' . $filename);
$content = '<?php' . "\n" . '/* This file was generated by the metarefresh module at '. $this->getTime() . "\n";
$content .= ' Do not update it manually as it will get overwritten' . "\n" . '*/' . "\n";
foreach($elements as $m) {
$entityID = $m['metadata']['entityid'];
$content .= "\n";
$content .= '$metadata[\'' . addslashes($entityID) . '\'] = ' . var_export($m['metadata'], TRUE) . ';' . "\n";
}
$content .= "\n" . '?>';
SimpleSAML_Utilities::writeFile($filename, $content);
} elseif(is_file($filename)) {
if(unlink($filename)) {
SimpleSAML_Logger::debug('Deleting stale metadata file: ' . $filename);
} else {
SimpleSAML_Logger::warning('Could not delete stale metadata file: ' . $filename);
}
}
fwrite($fh, "\n");
fwrite($fh, '?>');
fclose($fh);
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment