<?php /** * Utility class for XML and DOM manipulation. * * @package SimpleSAMLphp */ namespace SimpleSAML\Utils; class XML { /** * This function performs some sanity checks on XML documents, and optionally validates them against their schema * if the 'debug.validatexml' option is enabled. A warning will be printed to the log if validation fails. * * @param string $message The SAML document we want to check. * @param string $type The type of document. Can be one of: * - 'saml20' * - 'saml11' * - 'saml-meta' * * @throws \InvalidArgumentException If $message is not a string or $type is not a string containing one of the * values allowed. * @throws \SimpleSAML_Error_Exception If $message contains a doctype declaration. * * @author Olav Morken, UNINETT AS <olav.morken@uninett.no> * @author Jaime Perez, UNINETT AS <jaime.perez@uninett.no> */ public static function checkSAMLMessage($message, $type) { $allowed_types = array('saml20', 'saml11', 'saml-meta'); if (!(is_string($message) && in_array($type, $allowed_types))) { throw new \InvalidArgumentException('Invalid input parameters.'); } // a SAML message should not contain a doctype-declaration if (strpos($message, '<!DOCTYPE') !== false) { throw new \SimpleSAML_Error_Exception('XML contained a doctype declaration.'); } $enabled = \SimpleSAML_Configuration::getInstance()->getBoolean('debug.validatexml', null); if (!$enabled) { return; } $result = true; switch ($type) { case 'saml11': $result = self::isValid($message, 'oasis-sstc-saml-schema-protocol-1.1.xsd'); break; case 'saml20': $result = self::isValid($message, 'saml-schema-protocol-2.0.xsd'); break; case 'saml-meta': $result = self::isValid($message, 'saml-schema-metadata-2.0.xsd'); } if ($result !== true) { \SimpleSAML_Logger::warning($result); } } /** * Helper function to log SAML messages that we send or receive. * * @param string|\DOMElement $message The message, as an string containing the XML or an XML element. * @param string $type Whether this message is sent or received, encrypted or decrypted. The following * values are supported: * - 'in': for messages received. * - 'out': for outgoing messages. * - 'decrypt': for decrypted messages. * - 'encrypt': for encrypted messages. * * @throws \SimpleSAML_Error_Exception If $type is not a string or $message is neither a string nor a \DOMElement. * * @author Olav Morken, UNINETT AS <olav.morken@uninett.no> */ public static function debugSAMLMessage($message, $type) { if (!(is_string($type) && (is_string($message) || $message instanceof \DOMElement))) { throw new \SimpleSAML_Error_Exception('Invalid input parameters.'); } $globalConfig = \SimpleSAML_Configuration::getInstance(); if (!$globalConfig->getBoolean('debug', false)) { // message debug disabled return; } if ($message instanceof \DOMElement) { $message = $message->ownerDocument->saveXML($message); } switch ($type) { case 'in': \SimpleSAML_Logger::debug('Received message:'); break; case 'out': \SimpleSAML_Logger::debug('Sending message:'); break; case 'decrypt': \SimpleSAML_Logger::debug('Decrypted message:'); break; case 'encrypt': \SimpleSAML_Logger::debug('Encrypted message:'); break; default: assert(false); } $str = self::formatXMLString($message); foreach (explode("\n", $str) as $line) { \SimpleSAML_Logger::debug($line); } } /** * Format a DOM element. * * This function takes in a DOM element, and inserts whitespace to make it more readable. Note that whitespace * added previously will be removed. * * @param \DOMElement $root The root element which should be formatted. * @param string $indentBase The indentation this element should be assumed to have. Defaults to an empty * string. * * @throws \SimpleSAML_Error_Exception If $root is not a DOMElement or $indentBase is not a string. * @author Olav Morken, UNINETT AS <olav.morken@uninett.no> */ public static function formatDOMElement(\DOMElement $root, $indentBase = '') { if (!is_string($indentBase)) { throw new \SimpleSAML_Error_Exception('Invalid input parameters'); } // check what this element contains $fullText = ''; // all text in this element $textNodes = array(); // text nodes which should be deleted $childNodes = array(); // other child nodes for ($i = 0; $i < $root->childNodes->length; $i++) { $child = $root->childNodes->item($i); if ($child instanceof \DOMText) { $textNodes[] = $child; $fullText .= $child->wholeText; } elseif ($child instanceof \DOMComment || $child instanceof \DOMElement) { $childNodes[] = $child; } else { // unknown node type. We don't know how to format this return; } } $fullText = trim($fullText); if (strlen($fullText) > 0) { // we contain textelf $hasText = true; } else { $hasText = false; } $hasChildNode = (count($childNodes) > 0); if ($hasText && $hasChildNode) { // element contains both text and child nodes - we don't know how to format this one return; } // remove text nodes foreach ($textNodes as $node) { $root->removeChild($node); } if ($hasText) { // only text - add a single text node to the element with the full text $root->appendChild(new \DOMText($fullText)); return; } if (!$hasChildNode) { // empty node. Nothing to do return; } /* Element contains only child nodes - add indentation before each one, and * format child elements. */ $childIndentation = $indentBase.' '; foreach ($childNodes as $node) { // add indentation before node $root->insertBefore(new \DOMText("\n".$childIndentation), $node); // format child elements if ($node instanceof \DOMElement) { self::formatDOMElement($node, $childIndentation); } } // add indentation before closing tag $root->appendChild(new \DOMText("\n".$indentBase)); } /** * Format an XML string. * * This function formats an XML string using the formatDOMElement() function. * * @param string $xml An XML string which should be formatted. * @param string $indentBase Optional indentation which should be applied to all the output. Optional, defaults * to ''. * * @return string The formatted string. * @throws \SimpleSAML_Error_Exception If the input does not parse correctly as an XML string or parameters are not * strings. * @author Olav Morken, UNINETT AS <olav.morken@uninett.no> */ public static function formatXMLString($xml, $indentBase = '') { if (!is_string($xml) || !is_string($indentBase)) { throw new \SimpleSAML_Error_Exception('Invalid input parameters'); } $doc = new \DOMDocument(); if (!$doc->loadXML($xml)) { throw new \SimpleSAML_Error_Exception('Error parsing XML string.'); } $root = $doc->firstChild; self::formatDOMElement($root, $indentBase); return $doc->saveXML($root); } /** * This function finds direct descendants of a DOM element with the specified * localName and namespace. They are returned in an array. * * This function accepts the same shortcuts for namespaces as the isDOMElementOfType function. * * @param \DOMElement $element The element we should look in. * @param string $localName The name the element should have. * @param string $namespaceURI The namespace the element should have. * * @return array Array with the matching elements in the order they are found. An empty array is * returned if no elements match. */ public static function getDOMChildren(\DOMElement $element, $localName, $namespaceURI) { assert('is_string($localName)'); assert('is_string($namespaceURI)'); $ret = array(); for ($i = 0; $i < $element->childNodes->length; $i++) { $child = $element->childNodes->item($i); // skip text nodes and comment elements if ($child instanceof \DOMText || $child instanceof \DOMComment) { continue; } if (self::isDOMElementOfType($child, $localName, $namespaceURI) === true) { $ret[] = $child; } } return $ret; } /** * This function extracts the text from DOMElements which should contain only text content. * * @param \DOMElement $element The element we should extract text from. * * @return string The text content of the element. * @throws \SimpleSAML_Error_Exception If the element contains a non-text child node. * @author Olav Morken, UNINETT AS <olav.morken@uninett.no> */ public static function getDOMText(\DOMElement $element) { if (!($element instanceof \DOMElement)) { throw new \SimpleSAML_Error_Exception('Invalid input parameters'); } $txt = ''; for ($i = 0; $i < $element->childNodes->length; $i++) { $child = $element->childNodes->item($i); if (!($child instanceof \DOMText)) { throw new \SimpleSAML_Error_Exception($element->localName.' contained a non-text child node.'); } $txt .= $child->wholeText; } $txt = trim($txt); return $txt; } /** * This function checks if the DOMElement has the correct localName and namespaceURI. * * We also define the following shortcuts for namespaces: * - '@ds': 'http://www.w3.org/2000/09/xmldsig#' * - '@md': 'urn:oasis:names:tc:SAML:2.0:metadata' * - '@saml1': 'urn:oasis:names:tc:SAML:1.0:assertion' * - '@saml1md': 'urn:oasis:names:tc:SAML:profiles:v1metadata' * - '@saml1p': 'urn:oasis:names:tc:SAML:1.0:protocol' * - '@saml2': 'urn:oasis:names:tc:SAML:2.0:assertion' * - '@saml2p': 'urn:oasis:names:tc:SAML:2.0:protocol' * * @param \DOMNode $element The element we should check. * @param string $name The local name the element should have. * @param string $nsURI The namespaceURI the element should have. * * @return boolean True if both namespace and local name matches, false otherwise. * @throws \SimpleSAML_Error_Exception If the namespace shortcut is unknown. * * @author Andreas Solberg, UNINETT AS <andreas.solberg@uninett.no> * @author Olav Morken, UNINETT AS <olav.morken@uninett.no> */ public static function isDOMElementOfType(\DOMNode $element, $name, $nsURI) { if (!($element instanceof \DOMElement) || !is_string($name) || !is_string($nsURI) || strlen($nsURI) === 0) { // most likely a comment-node return false; } // check if the namespace is a shortcut, and expand it if it is if ($nsURI[0] === '@') { // the defined shortcuts $shortcuts = array( '@ds' => 'http://www.w3.org/2000/09/xmldsig#', '@md' => 'urn:oasis:names:tc:SAML:2.0:metadata', '@saml1' => 'urn:oasis:names:tc:SAML:1.0:assertion', '@saml1md' => 'urn:oasis:names:tc:SAML:profiles:v1metadata', '@saml1p' => 'urn:oasis:names:tc:SAML:1.0:protocol', '@saml2' => 'urn:oasis:names:tc:SAML:2.0:assertion', '@saml2p' => 'urn:oasis:names:tc:SAML:2.0:protocol', '@shibmd' => 'urn:mace:shibboleth:metadata:1.0', ); // check if it is a valid shortcut if (!array_key_exists($nsURI, $shortcuts)) { throw new \SimpleSAML_Error_Exception('Unknown namespace shortcut: '.$nsURI); } // expand the shortcut $nsURI = $shortcuts[$nsURI]; } if ($element->localName !== $name) { return false; } if ($element->namespaceURI !== $nsURI) { return false; } return true; } /** * This function attempts to validate an XML string against the specified schema. It will parse the string into a * DOM document and validate this document against the schema. * * Note that this function returns values that are evaluated as a logical true, both when validation works and when * it doesn't. Please use strict comparisons to check the values returned. * * @param string|\DOMDocument $xml The XML string or document which should be validated. * @param string $schema The filename of the schema that should be used to validate the document. * * @return boolean|string Returns a string with errors found if validation fails. True if validation passes ok. * @throws \InvalidArgumentException If $schema is not a string, or $xml is neither a string nor a \DOMDocument. * * @author Olav Morken, UNINETT AS <olav.morken@uninett.no> */ public static function isValid($xml, $schema) { if (!(is_string($schema) && (is_string($xml) || $xml instanceof \DOMDocument))) { throw new \InvalidArgumentException('Invalid input parameters.'); } \SimpleSAML_XML_Errors::begin(); if ($xml instanceof \DOMDocument) { $dom = $xml; $res = true; } else { $dom = new \DOMDocument; $res = $dom->loadXML($xml); } if ($res) { $config = \SimpleSAML_Configuration::getInstance(); $schemaPath = $config->resolvePath('schemas').'/'; $schemaFile = $schemaPath.$schema; $res = $dom->schemaValidate($schemaFile); if ($res) { \SimpleSAML_XML_Errors::end(); return true; } $errorText = "Schema validation failed on XML string:\n"; } else { $errorText = "Failed to parse XML string for schema validation:\n"; } $errors = \SimpleSAML_XML_Errors::end(); $errorText .= \SimpleSAML_XML_Errors::formatErrors($errors); return $errorText; } }