diff --git a/lib/SimpleSAML/Utilities.php b/lib/SimpleSAML/Utilities.php index 2a15674e2179213be41501b8b8cf16c8386d488d..f68ab1e5f2ce3498062950850d47048469fb01f6 100644 --- a/lib/SimpleSAML/Utilities.php +++ b/lib/SimpleSAML/Utilities.php @@ -2117,9 +2117,10 @@ class SimpleSAML_Utilities { * * @param string $path The path or URL we should fetch. * @param array $context Extra context options. This parameter is optional. - * @return string The data we fetched. + * @param boolean $getHeaders Whether to also return response headers. Optional. + * @return mixed array if $getHeaders is set, string otherwise */ - public static function fetch($path, $context = array()) { + public static function fetch($path, $context = array(), $getHeaders = FALSE) { assert('is_string($path)'); $config = SimpleSAML_Configuration::getInstance(); @@ -2141,6 +2142,25 @@ class SimpleSAML_Utilities { throw new SimpleSAML_Error_Exception('Error fetching ' . var_export($path, TRUE) . ':' . self::getLastError()); } + // Data and headers. + if ($getHeaders) { + + $headers = array(); + + foreach($http_response_header as $h) { + if(preg_match('@^HTTP/1\.[01]\s+\d{3}\s+@', $h)) { + $headers = array(); // reset + $headers[0] = $h; + continue; + } + $bits = explode(':', $h, 2); + if(count($bits) === 2) { + $headers[strtolower($bits[0])] = trim($bits[1]); + } + } + return array($data, $headers); + } + return $data; } diff --git a/modules/metarefresh/config-templates/config-metarefresh.php b/modules/metarefresh/config-templates/config-metarefresh.php index dc8b2c0b589c19f93887116fee3c61fd13f136b1..18bfd30f6a29f09c80ad53f7cca94ad73fa75618 100644 --- a/modules/metarefresh/config-templates/config-metarefresh.php +++ b/modules/metarefresh/config-templates/config-metarefresh.php @@ -9,6 +9,14 @@ $config = array( # 'http://my.own.uni/idp' #), + /* + * Conditional GET requests + * Efficient downloading so polling can be done more frequently. + * Works for sources that send 'Last-Modified' or 'Etag' headers. + * Note that the 'data' directory needs to be writable for this to work. + */ + #'conditionalGET' => TRUE, + 'sets' => array( 'kalmar' => array( @@ -16,7 +24,7 @@ $config = array( 'sources' => array( array( /* - * entityIDs that should be excluded from this set. + * entityIDs that should be excluded from this src. */ #'blacklist' => array( # 'http://some.other.uni/idp', @@ -30,8 +38,9 @@ $config = array( # 'http://some.other.uni/idp', #), - 'src' => 'https://kalmar.feide.no/simplesaml/module.php/aggregator/?id=kalmarcentral&mimetype=text/plain&exclude=norway', - 'validateFingerprint' => '591d4b4670463eeda91fcc816dc0af2a092aa801', + #'conditionalGET' => TRUE, + 'src' => 'https://kalmar2.org/simplesaml/module.php/aggregator/?id=kalmarcentral&set=saml2&exclude=norway', + 'validateFingerprint' => '59:1D:4B:46:70:46:3E:ED:A9:1F:CC:81:6D:C0:AF:2A:09:2A:A8:01', 'template' => array( 'tags' => array('kalmar'), 'authproc' => array( diff --git a/modules/metarefresh/hooks/hook_cron.php b/modules/metarefresh/hooks/hook_cron.php index 5cf828feef48b24d790a167c2c3a687d3f40ad9e..ce9be0c9d2372d5704bbb61ec3a239eb2306bdd6 100644 --- a/modules/metarefresh/hooks/hook_cron.php +++ b/modules/metarefresh/hooks/hook_cron.php @@ -16,6 +16,7 @@ function metarefresh_hook_cron(&$croninfo) { $mconfig = SimpleSAML_Configuration::getOptionalConfig('config-metarefresh.php'); $sets = $mconfig->getConfigList('sets', array()); + $stateFile = $config->getPathValue('datadir', 'data/') . 'metarefresh-state.php'; foreach ($sets AS $setkey => $set) { // Only process sets where cron matches the current cron tag. @@ -31,11 +32,21 @@ function metarefresh_hook_cron(&$croninfo) { $expire = NULL; } - $metaloader = new sspmod_metarefresh_MetaLoader($expire); + $outputDir = $set->getString('outputDir'); + $outputDir = $config->resolvePath($outputDir); + $outputFormat = $set->getValueValidate('outputFormat', array('flatfile', 'serialize'), 'flatfile'); + + $oldMetadataSrc = SimpleSAML_Metadata_MetaDataStorageSource::getSource(array( + 'type' => $outputFormat, + 'directory' => $outputDir, + )); + + $metaloader = new sspmod_metarefresh_MetaLoader($expire, $stateFile, $oldMetadataSrc); - # Get global blacklist + # Get global blacklist, whitelist and caching info $blacklist = $mconfig->getArray('blacklist', array()); $whitelist = $mconfig->getArray('whitelist', array()); + $conditionalGET = $mconfig->getBoolean('conditionalGET', FALSE); foreach($set->getArray('sources') AS $source) { @@ -53,14 +64,18 @@ function metarefresh_hook_cron(&$croninfo) { $source['whitelist'] = $whitelist; } + # Let src specific conditionalGET override global one + if(!isset($source['conditionalGET'])) { + $source['conditionalGET'] = $conditionalGET; + } + SimpleSAML_Logger::debug('cron [metarefresh]: In set [' . $setkey . '] loading source [' . $source['src'] . ']'); $metaloader->loadSource($source); } - $outputDir = $set->getString('outputDir'); - $outputDir = $config->resolvePath($outputDir); + // Write state information back to disk + $metaloader->writeState(); - $outputFormat = $set->getValueValidate('outputFormat', array('flatfile', 'serialize'), 'flatfile'); switch ($outputFormat) { case 'flatfile': $metaloader->writeMetadataFiles($outputDir); diff --git a/modules/metarefresh/lib/MetaLoader.php b/modules/metarefresh/lib/MetaLoader.php index 3d9cec5ff75f8247864da34bb4936f007d4755c8..c0041b1525ab5d5e7ee675ed5dbe3e3a2c2c58c4 100644 --- a/modules/metarefresh/lib/MetaLoader.php +++ b/modules/metarefresh/lib/MetaLoader.php @@ -7,8 +7,14 @@ class sspmod_metarefresh_MetaLoader { - private $metadata; private $expire; + private $metadata; + private $oldMetadataSrc; + private $stateFile; + private $changed; + private static $types = array('saml20-idp-remote', 'saml20-sp-remote', + 'shib13-idp-remote', 'shib13-sp-remote', 'attributeauthority-remote'); + /** * Constructor @@ -16,65 +22,172 @@ class sspmod_metarefresh_MetaLoader { * @param array $sources Sources... * @param */ - public function __construct($expire = NULL) { - $this->expire = $expire; + public function __construct($expire = NULL, $stateFile = NULL, $oldMetadataSrc = NULL) { + $this->expire = $expire; $this->metadata = array(); + $this->oldMetadataSrc = $oldMetadataSrc; + $this->stateFile = $stateFile; + $this->changed = FALSE; + + // Read file containing $state from disk + if(is_readable($stateFile)) { + require($stateFile); + } + + $this->state = (isset($state)) ? $state : array(); + } /** * This function processes a SAML metadata file. * - * @param $src Filename of the metadata file. + * @param $source */ public function loadSource($source) { - - $entities = array(); + + $context = NULL; + + $config = SimpleSAML_Configuration::getInstance(); + $name = $config->getString('technicalcontact_name', NULL); + $mail = $config->getString('technicalcontact_email', NULL); + $rawheader = "User-Agent: SimpleSAMLphp metarefresh, run by $name <$mail>\r\n"; + + if (isset($source['conditionalGET']) && $source['conditionalGET']) { + if(array_key_exists($source['src'], $this->state)) { + + $sourceState = $this->state[$source['src']]; + + if(isset($sourceState['last-modified'])) { + $rawheader .= 'If-Modified-Since: ' . $sourceState['last-modified'] . "\r\n"; + } + + if(isset($sourceState['etag'])) { + $rawheader .= 'If-None-Match: ' . $sourceState['etag'] . "\r\n"; + } + } + } + + // Build new HTTP context + $context = array('http' => array('header' => $rawheader)); + + + // GET! try { - $entities = SimpleSAML_Metadata_SAMLParser::parseDescriptorsFile($source['src']); + list($data, $responseHeaders) = SimpleSAML_Utilities::fetch($source['src'], $context, TRUE); } catch(Exception $e) { SimpleSAML_Logger::warning('metarefresh: Failed to retrieve metadata. ' . $e->getMessage()); } - foreach($entities as $entity) { + //SimpleSAML_Logger::debug('All response headers: ' . var_export($responsHeaders,1)); + $status = $responseHeaders[0]; - if(isset($source['blacklist'])) { - if(!empty($source['blacklist']) && in_array($entity->getEntityID(), $source['blacklist'])) { - SimpleSAML_Logger::info('Skipping "' . $entity->getEntityID() . '" - blacklisted.' . "\n"); - continue; + if(preg_match('@^HTTP/1\.[01]\s304\s@', $status ) && isset($this->oldMetadataSrc)) { + // Not-Modified. This could only have happened if 'conditionalGET' was used. + SimpleSAML_Logger::debug('Received \'' . $status . '\', re-using cached metadata'); + + foreach(self::$types as $type) { + foreach($this->oldMetadataSrc->getMetadataSet($type) as $entity) { + if(array_key_exists('metarefresh:src', $entity)) { + if($entity['metarefresh:src'] == $source['src']) { + //SimpleSAML_Logger::debug('Re-using cached metadata for ' . $entity['entityid']); + $this->addMetadata($source['src'], $entity, $type); + } + } } } + } else { - if(isset($source['whitelist'])) { - if(!empty($source['whitelist']) && !in_array($entity->getEntityID(), $source['whitelist'])) { - SimpleSAML_Logger::info('Skipping "' . $entity->getEntityID() . '" - not in the whitelist.' . "\n"); - continue; + // Stale or no metadata, so a fresh copy + if (isset($source['conditionalGET']) && $source['conditionalGET']) { + SimpleSAML_Logger::debug('Downloaded fresh copy'); + } + + $entities = array(); + try{ + $doc = new DOMDocument(); + $res = $doc->loadXML($data); + if($res !== TRUE) { + throw new Exception('Failed to read XML from ' . $source['src']); } + if($doc->documentElement === NULL) throw new Exception('Opened file is not an XML document: ' . $source['src']); + $entities = SimpleSAML_Metadata_SAMLParser::parseDescriptorsElement($doc->documentElement); + } catch(Exception $e) { + SimpleSAML_Logger::warning('metarefresh: Failed to retrieve metadata. ' . $e->getMessage()); } - if(array_key_exists('validateFingerprint', $source) && $source['validateFingerprint'] !== NULL) { - if(!$entity->validateFingerprint($source['validateFingerprint'])) { - SimpleSAML_Logger::info('Skipping "' . $entity->getEntityId() . '" - could not verify signature.' . "\n"); - continue; + foreach($entities as $entity) { + + if(isset($source['blacklist'])) { + if(!empty($source['blacklist']) && in_array($entity->getEntityID(), $source['blacklist'])) { + SimpleSAML_Logger::info('Skipping "' . $entity->getEntityID() . '" - blacklisted.' . "\n"); + continue; + } + } + + if(isset($source['whitelist'])) { + if(!empty($source['whitelist']) && !in_array($entity->getEntityID(), $source['whitelist'])) { + SimpleSAML_Logger::info('Skipping "' . $entity->getEntityID() . '" - not in the whitelist.' . "\n"); + continue; + } + } + + if(array_key_exists('validateFingerprint', $source) && $source['validateFingerprint'] !== NULL) { + if(!$entity->validateFingerprint($source['validateFingerprint'])) { + SimpleSAML_Logger::info('Skipping "' . $entity->getEntityId() . '" - could not verify signature.' . "\n"); + continue; + } + } + + $template = NULL; + if (array_key_exists('template', $source)) $template = $source['template']; + + $this->addMetadata($source['src'], $entity->getMetadata1xSP(), 'shib13-sp-remote', $template); + $this->addMetadata($source['src'], $entity->getMetadata1xIdP(), 'shib13-idp-remote', $template); + $this->addMetadata($source['src'], $entity->getMetadata20SP(), 'saml20-sp-remote', $template); + $this->addMetadata($source['src'], $entity->getMetadata20IdP(), 'saml20-idp-remote', $template); + $attributeAuthorities = $entity->getAttributeAuthorities(); + if (!empty($attributeAuthorities)) { + $this->addMetadata($source['src'], $attributeAuthorities[0], 'attributeauthority-remote', $template); } } - - $template = NULL; - if (array_key_exists('template', $source)) $template = $source['template']; - - $this->addMetadata($source['src'], $entity->getMetadata1xSP(), 'shib13-sp-remote', $template); - $this->addMetadata($source['src'], $entity->getMetadata1xIdP(), 'shib13-idp-remote', $template); - $this->addMetadata($source['src'], $entity->getMetadata20SP(), 'saml20-sp-remote', $template); - $this->addMetadata($source['src'], $entity->getMetadata20IdP(), 'saml20-idp-remote', $template); - $attributeAuthorities = $entity->getAttributeAuthorities(); - if (!empty($attributeAuthorities)) { - $this->addMetadata($source['src'], $attributeAuthorities[0], 'attributeauthority-remote', $template); + } + + // Save state for this src + if (isset($source['conditionalGET']) && $source['conditionalGET']) { + + // Headers section + $candidates = array('last-modified', 'etag'); + + foreach($candidates as $candidate) { + if(array_key_exists($candidate, $responseHeaders)) { + $this->state[$source['src']][$candidate] = $responseHeaders[$candidate]; + } } + if(!empty($this->state[$source['src']])) { + // Timestamp when this src was requested. + $this->state[$source['src']]['requested_at'] = $this->getTime(); + + $this->changed = TRUE; + } } } + /** + * This function write the state array back to disk + */ + public function writeState() { + if($this->changed) { + SimpleSAML_Logger::debug('Writing: ' . $this->stateFile); + SimpleSAML_Utilities::writeFile( + $this->stateFile, + "<?php\n/* This file was generated by the metarefresh module at ".$this->getTime() . ".\n". + " Do not update it manually as it will get overwritten. */\n". + '$state = ' . var_export($this->state, TRUE) . ";\n?>\n" + ); + } + } - /** * This function writes the metadata to stdout. */ @@ -126,6 +239,7 @@ class sspmod_metarefresh_MetaLoader { $metadata = array_merge($metadata, $template); } + $metadata['metarefresh:src'] = $filename; if(!array_key_exists($type, $this->metadata)) { $this->metadata[$type] = array(); } @@ -138,11 +252,11 @@ class sspmod_metarefresh_MetaLoader { // Override metadata expire with more restrictive global config- if ($this->expire < $metadata['expire']) - $metadata['expire'] = $this->expire; + $metadata['expire'] = $this->expire; // If expire is not already in metadata use global config } else { - $metadata['expire'] = $this->expire; + $metadata['expire'] = $this->expire; } } @@ -201,34 +315,33 @@ class sspmod_metarefresh_MetaLoader { } } - foreach($this->metadata as $category => $elements) { - - $filename = $outputDir . '/' . $category . '.php'; - - SimpleSAML_Logger::debug('Writing: ' . $filename . "\n"); - - $fh = @fopen($filename, 'w'); - if($fh === FALSE) { - throw new Exception('Failed to open file for writing: ' . $filename . "\n"); - exit(1); - } - - fwrite($fh, '<?php' . "\n"); - - foreach($elements as $m) { - $filename = $m['filename']; - $entityID = $m['metadata']['entityid']; - - fwrite($fh, "\n"); - fwrite($fh, '/* The following metadata was generated from ' . $filename . ' on ' . $this->getTime() . '. */' . "\n"); - fwrite($fh, '$metadata[\'' . addslashes($entityID) . '\'] = ' . var_export($m['metadata'], TRUE) . ';' . "\n"); + foreach(self::$types as $type) { + + $filename = $outputDir . '/' . $type . '.php'; + + if(array_key_exists($type, $this->metadata)) { + $elements = $this->metadata[$type]; + SimpleSAML_Logger::debug('Writing: ' . $filename); + + $content = '<?php' . "\n" . '/* This file was generated by the metarefresh module at '. $this->getTime() . "\n"; + $content .= ' Do not update it manually as it will get overwritten' . "\n" . '*/' . "\n"; + + foreach($elements as $m) { + $entityID = $m['metadata']['entityid']; + $content .= "\n"; + $content .= '$metadata[\'' . addslashes($entityID) . '\'] = ' . var_export($m['metadata'], TRUE) . ';' . "\n"; + } + + $content .= "\n" . '?>'; + + SimpleSAML_Utilities::writeFile($filename, $content); + } elseif(is_file($filename)) { + if(unlink($filename)) { + SimpleSAML_Logger::debug('Deleting stale metadata file: ' . $filename); + } else { + SimpleSAML_Logger::warning('Could not delete stale metadata file: ' . $filename); + } } - - - fwrite($fh, "\n"); - fwrite($fh, '?>'); - - fclose($fh); } }