powered by nequal
Home » Diggin_Scraper_Adapter_Htmlscraping » Timeline » 2454

Changeset 2454 -- 2011-04-20 23:10:56

Author
させざき
Comment
add vendor2 under tests (force)

Files


Diffs

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor2/HTMLParser.class.php

@@ -0,0 +1,743 @@
+<?php
+
+/**
+ * ---------------------------------------------------------------------
+ * HTMLParser class
+ * ---------------------------------------------------------------------
+ * PHP versions 4 and 5
+ * ---------------------------------------------------------------------
+ * LICENSE: This source file is subject to the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * either version 2.1 of the License, or any later version
+ * that is available through the world-wide-web at the following URI:
+ * http://www.gnu.org/licenses/lgpl.html
+ * If you did not have a copy of the GNU Lesser General Public License
+ * and are unable to obtain it through the web, please write to
+ * the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ---------------------------------------------------------------------
+ */
+
+require_once('XML/HTMLSax3.php');
+
+/**
+ * HTMLParser class
+ *
+ * A SAX based parser using PEAR XML_HTMLSax3 class
+ * helps you to have a XML compliant document
+ * from malformed markups such as HTML.
+ *
+ * @version    1.2.1 (stable) issued May 17, 2007
+ * @author     ucb.rcdtokyo http://www.rcdtokyo.com/ucb/
+ * @license    GNU LGPL v2.1+ http://www.gnu.org/licenses/lgpl.html
+ * @see        http://pear.php.net/package/XML_HTMLSax3
+ *
+ * Basic usage:
+ * <code>
+ * $source = '<HTML><FOO><P ALIGN=CENTER FOO=BAR>FOO&BAR<BAR>;
+ * $parser = new HTMLParser;
+ * $parser->setRoot('html');
+ * $parser->setGenericParent('body');
+ * $parser->setRule(array(
+ *   'html' => array(
+ *     'children' => array('body'),
+ *     'attributes' => array('xmlns', 'xml:lang'),
+ *     'default_child' => 'body'),
+ *   'body' => array(
+ *     'children' => array('p'),
+ *     'attributes' => array('id', 'class', 'style' 'xml:lang'),
+ *   'p' => array(
+ *     'children' => array('#PCDATA'),
+ *     'attributes' => array('id', 'class', 'style' 'xml:lang', 'align'),
+ * ));
+ * $parser->parse($source);
+ * $result = $parser->dump();
+ * // $result is:
+ * // <html><body><p align="CENTER">FOO&amp;BAR</p></body></html>
+ * </code>
+ */
+class HTMLParser
+{
+    /**
+     * @var array
+     * @access protected
+     */
+    var $dtd = array();
+
+    /**
+     * @var array
+     * @access protected
+     */
+    var $construct = array();
+
+    /**
+     * @var string
+     * @access protected
+     */
+    var $current_construct = '';
+
+    /**
+     * @var array
+     * @access protected
+     */
+    var $node = array();
+
+    /**
+     * @var array
+     * @access protected
+     */
+    var $current_node = array();
+
+    /**
+     * @var array
+     * @access protected
+     */
+    var $root = array();
+
+    /**
+     * @var array
+     * @access protected
+     */
+    var $tags_to_save = array();
+
+    /**
+     * @var array
+     * @access protected
+     */
+    var $saved_tags = array();
+
+    /**
+     * @var string
+     * @access protected
+     */
+    var $generic_parent;
+
+    /**
+     * Attributes minimized in HTML.
+     *
+     * @var array
+     * @access protected
+     */
+    var $html_minimized_attributes = array(
+        'checked', 'compact', 'controls', 'declare', 'defer', 'disabled',
+        'ismap', 'mayscript', 'multiple', 'nohref', 'noshade', 'nowrap',
+        'readonly', 'selected', 'utn', 'wrap'
+    );
+
+    /**
+     * @access public
+     */
+    function HTMLParser()
+    {
+        $this->__construct();
+    }
+
+    /**
+     * @access public
+     */
+    function __construct()
+    {
+    }
+
+    /**
+     * @param  string  $data
+     * @access public
+     */
+    function parse($data)
+    {
+        $parser = new XML_HTMLSax3;
+        $parser->set_object($this);
+        $parser->set_element_handler('openHandler', 'closeHandler');
+        $parser->set_data_handler('dataHandler');
+        $parser->set_escape_handler('escapeHandler');
+        $parser->set_option('XML_OPTION_TRIM_DATA_NODES', 0);
+        $parser->parse($data);
+    }
+
+    /**
+     * Handling open tags.
+     *
+     * @param  object  $parser
+     * @param  string  $name
+     * @param  array   $attribs
+     * @return void
+     * @access public
+     */
+    function openHandler(&$parser, $name, $attribs)
+    {
+        $name = strtolower($name);
+        // Do nothing if the element name is not defined.
+        if (isset($this->dtd[$name])) {
+            if (isset($this->dtd[$name]['replace'])) {
+                $name = $this->dtd[$name]['replace'];
+            }
+            if ($this->_checkAttributes($name, $attribs)) {
+                if (in_array($name, $this->tags_to_save)) {
+                    $this->_saveTag($name, $attribs);
+                }
+                if (!empty($this->current_node)) {
+                    if (!isset($this->node[$name])) {
+                        $this->_elementLookup($name, $attribs);
+                    } elseif (isset($this->dtd[$name]['type'])
+                        and $this->dtd[$name]['type'] == 'unique'
+                        and !empty($attribs)
+                        and false !== strpos($this->current_construct, "<$name>")) {
+                        $this->current_construct = str_replace(
+                            "<$name>",
+                            $this->_formatOpenTag($name, $attribs),
+                            $this->current_construct
+                        );
+                    }
+                } elseif ($name != $this->root[0]) {
+                    $this->_appendNode(
+                        $this->root[0],
+                        (isset($this->root[1])? $this->root[1]: array())
+                    );
+                    $this->_elementLookup($name, $attribs);
+                } else {
+                    $this->_appendNode($name, $attribs);
+                }
+            }
+        }
+    }
+
+    /**
+     * Handling close tags.
+     *
+     * @param  object  $parser
+     * @param  string  $name
+     * @return void
+     * @access public
+     */
+    function closeHandler(&$parser, $name)
+    {
+        $name = strtolower(trim($name));
+        // Do nothing if the element name is not defined
+        // or the array representing the current node tree is empty.
+        if (isset($this->dtd[$name]) and !empty($this->current_node)) {
+            if (isset($this->dtd[$name]['replace'])) {
+                $name = $this->dtd[$name]['replace'];
+            }
+            if (!isset($this->dtd[$name]['type']) or $this->dtd[$name]['type'] != 'unique') {
+                if ($name == $this->current_node[0]) {
+                    $tagname = array_shift($this->current_node);
+                    $this->current_construct .= "</$tagname>";
+                } elseif ($positions = array_keys($this->current_node, $name)) {
+                    $found = true;
+                    $array = array_slice($this->current_node, 0, $positions[0]);
+                    if (isset($this->dtd[$name]['default_parent'])
+                        and in_array($this->dtd[$name]['default_parent'], $array)) {
+                        $found = false;
+                    }
+                    if ($found) {
+                        for ($i = 0; $i <= $positions[0]; $i++) {
+                            $tagname = array_shift($this->current_node);
+                            $this->current_construct .= "</$tagname>";
+                        }
+                    }
+                } else {
+                    foreach ($this->node as $key => $value) {
+                        if ($name == $value[0]) {
+                            $this->_switchCurrentNode($key);
+                            $tagname = array_shift($this->current_node);
+                            $this->current_construct .= "</$tagname>";
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Handling data (#PCDATA) in elements.
+     *
+     * @param  object  $parser
+     * @param  string  $data
+     * @return void
+     * @access public
+     */
+    function dataHandler(&$parser, $data)
+    {
+        $data = preg_replace('/^[\t\r\n]*(.*)[\t\r\n]*$/', '$1', $data);
+        if (strlen($data) > 0) {
+            if (!empty($this->current_node)) {
+                $this->_dataLookup($data);
+            } else {
+                $this->_appendNode(
+                    $this->root[0],
+                    (isset($this->root[1])? $this->root[1]: array())
+                );
+                $this->_dataLookup($data);
+            }
+        }
+    }
+
+    /**
+     * Handling XML escapes (DOCTYPE declaration, Comment and CDATA section).
+     *
+     * @param  object  $parser
+     * @param  string  $data
+     * @return void
+     * @access public
+     */
+    function escapeHandler(&$parser, $data)
+    {
+        $data = preg_replace('/^(-{2,}.*?)-*$/s', '$1--', $data);
+        $this->current_construct .= "<!$data>";
+    }
+
+    /**
+     * Handling processing instructions.
+     *
+     * @param  object  $parser
+     * @param  string  $data
+     * @return void
+     * @access public
+     */
+    function piHandler(&$parser, $target, $data)
+    {
+    }
+
+    /**
+     * Handling JSP/ASP tags.
+     *
+     * @param  object  $parser
+     * @param  string  $data
+     * @return void
+     * @access public
+     */
+    function jaspHandler(&$parser, $data)
+    {
+    }
+
+    /**
+     * @param  array   $array
+     * @return void
+     * @access public
+     */
+    function setRule($rule)
+    {
+        $this->dtd =& $rule;
+    }
+
+    /**
+     * @param  string  $file
+     * @return void
+     * @access public
+     */
+    function setRuleFile($filename)
+    {
+        $this->dtd = require $filename;
+    }
+
+    /**
+     * @param  string  $encoding
+     * @return string
+     * @access public
+     */
+    function dump($encoding = 'UTF-8')
+    {
+        $result = $this->construct[$this->root[0]];
+        unset($this->construct[$this->root[0]]);
+        foreach ($this->construct as $key => $value) {
+            $result .= $value;
+            if (!empty($this->node[$key])) {
+                foreach ($this->node[$key] as $value) {
+                    $result .= "</$value>";
+                }
+            }
+        }
+        if (!empty($this->node[$this->root[0]])) {
+            foreach ($this->node[$this->root[0]] as $value) {
+                $result .= "</$value>";
+            }
+        }
+        if ($encoding != 'UTF-8'
+            and function_exists('mb_convert_variables')) {
+            mb_convert_variables($encoding, 'UTF-8', $result);
+        }
+        return $result;
+    }
+
+    /**
+     * Specify the root element of the document.
+     * If the name of the first element of the parsed document
+     * does not match the name of the root element you specified,
+     * the root element is automatically supplied.
+     * This will help you when you need a fully XML compliant output
+     * which must have a root element.
+     * Note: Attributes are NOT evaluated.
+     *
+     * Example:
+     * <code>
+     * $parser->setRoot(
+     *   'html',
+     *   array(
+     *     'xmlns' => 'http://www.w3.org/1999/xhtml',
+     *     'xml:lang' => 'ja'
+     *   )
+     * );
+     * </code>
+     *
+     * @param  string  $name
+     * @param  array   $attribs
+     * @return void
+     * @access public
+     */
+    function setRoot($name, $attribs = array())
+    {
+        $this->root = array($name, $attribs);
+    }
+
+    /**
+     * @param  string
+     * @return void
+     * @access public
+     */
+    function setGenericParent($name)
+    {
+        $this->generic_parent = $name;
+    }
+
+    /**
+     * @param  mixed
+     * @return void
+     * @access public
+     */
+    function setTagsToSave()
+    {
+        $args = func_get_args();
+        foreach ($args as $arg) {
+            if (is_array($arg)) {
+                $this->tags_to_save = array_merge($this->tags_to_save, $arg);
+            } else {
+                $this->tags_to_save[] = $arg;
+            }
+        }
+    }
+
+    /**
+     * @param  string  $encoding
+     * @return array
+     * @access public
+     */
+    function getSavedTags($encoding = 'UTF-8')
+    {
+        if ($encoding != 'UTF-8'
+            and function_exists('mb_convert_variables')) {
+            mb_convert_variables($encoding, 'UTF-8', $this->saved_tags);
+        }
+        return $this->saved_tags;
+    }
+
+    /**
+     * @param  string  $name
+     * @param  array   $attribs
+     * @return void
+     * @access protected
+     */
+    function _switchCurrentNode($name, $attribs = array())
+    {
+        if (!isset($this->node[$name])) {
+            $this->node[$name] = array($name);
+        }
+        $this->current_node =& $this->node[$name];
+        if (!isset($this->construct[$name])) {
+            $this->construct[$name] = $this->_formatOpenTag($name, $attribs);
+        }
+        $this->current_construct =& $this->construct[$name];
+    }
+
+    /**
+     * @param  string  $name
+     * @param  array   $attribs
+     * @return void
+     * @access protected
+     */
+    function _elementLookup($name, $attribs)
+    {
+        if (in_array($name, $this->dtd[$this->current_node[0]]['children'])) {
+            $this->_appendNode($name, $attribs);
+        } elseif (isset($this->dtd[$this->current_node[0]]['ignore_invalid_children'])) {
+            if (isset($this->dtd[$this->current_node[0]]['default_parent'])
+                and in_array($name, $this->dtd[$this->dtd[$this->current_node[0]]['default_parent']]['children'])) {
+                $this->_appendNode($name, $attribs, 1);
+            }
+        } elseif (isset($this->dtd[$name]['default_parent'])
+            and isset($this->dtd[$this->current_node[0]]['default_child'])
+            and $this->dtd[$name]['default_parent'] == $this->dtd[$this->current_node[0]]['default_child']) {
+            $this->_appendNode($this->dtd[$name]['default_parent']);
+            $this->_appendNode($name, $attribs);
+        } else {
+            $found = false;
+            if (!$found and isset($this->dtd[$this->current_node[0]]['default_child'])) {
+                $tagname = $this->dtd[$this->current_node[0]]['default_child'];
+                $array = array($tagname);
+                while (isset($this->dtd[$tagname]['default_child'])) {
+                    $tagname = $this->dtd[$tagname]['default_child'];
+                    $array[] = $tagname;
+                }
+                if (in_array($name, $this->dtd[$array[count($array) -1]]['children'])) {
+                    $found = true;
+                    foreach ($array as $value) {
+                        $this->_appendNode($value);
+                    }
+                    $this->_appendNode($name, $attribs);
+                }
+            }
+            if (!$found and $this->_lookupNodeTree($name, $attribs)) {
+                $found = true;
+            }
+            if (!$found) {
+                $tagname = isset($this->dtd[$name]['default_parent'])?
+                    $this->dtd[$name]['default_parent']:
+                    (isset($this->generic_parent)? $this->generic_parent: null);
+                if ($tagname) {
+                    if (isset($this->dtd[$tagname]['type']) and $this->dtd[$tagname]['type'] == 'unique') {
+                        $this->_switchCurrentNode($tagname);
+                        $this->_appendNode($name, $attribs);
+                    } else {
+                        if ($this->_lookupNodeTree($tagname)) {
+                            $this->_appendNode($name, $attribs);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * @param  string  $data
+     * @return void
+     * @access protected
+     */
+    function _dataLookup($data)
+    {
+        if (in_array('#PCDATA', $this->dtd[$this->current_node[0]]['children'])) {
+            $this->_appendData($data);
+        } elseif (!preg_match('/^\s*$/s', $data)
+            and !isset($this->dtd[$this->current_node[0]]['ignore_invalid_children'])
+            and isset($this->dtd[$this->current_node[0]]['default_child'])) {
+            $tagname = $this->dtd[$this->current_node[0]]['default_child'];
+            $array = array($tagname);
+            while (isset($this->dtd[$tagname]['default_child'])) {
+                $tagname = $this->dtd[$tagname]['default_child'];
+                $array[] = $tagname;
+            }
+            if (in_array('#PCDATA', $this->dtd[$array[count($array) -1]]['children'])) {
+                foreach ($array as $value) {
+                    $this->_appendNode($value);
+                }
+                $this->_appendData($data);
+            }
+        }
+    }
+
+    /**
+     * @param  string  $name
+     * @param  array   $attribs
+     * @return boolean
+     * @access protected
+     */
+    function _lookupNodeTree($name, $attribs = array())
+    {
+        $limit = count($this->current_node);
+        for ($i = 0; $i < $limit; $i++) {
+            if (in_array($name, $this->dtd[$this->current_node[$i]]['children'])) {
+                if (!isset($this->current_node[$i -1])
+                    or !in_array($this->current_node[$i -1], $this->dtd[$name]['children'])) {
+                    $this->_appendNode($name, $attribs, $i);
+                } else {
+                    $this->_insertNode($name, $attribs, $i);
+                }
+                return true;
+                break;
+            } elseif (
+                (isset($this->dtd[$name]['type'])
+                and $this->dtd[$name]['type'] == 'inline'
+                and isset($this->dtd[$this->current_node[$i]]['type'])
+                and $this->dtd[$this->current_node[$i]]['type'] == 'block')
+                or
+                (isset($this->dtd[$name]['default_parent'])
+                and isset($this->dtd[$this->dtd[$name]['default_parent']]['default_parent'])
+                and $this->current_node[$i] == $this->dtd[$this->dtd[$name]['default_parent']]['default_parent'])
+                ) {
+                return false;
+                break;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * @param  string  $name
+     * @param  array   $attribs
+     * @param  integer $position
+     * @return void
+     * @access protected
+     */
+    function _appendNode($name, $attribs = array(), $position = 0)
+    {
+        if (isset($this->dtd[$name]['type'])
+            and $this->dtd[$name]['type'] == 'unique') {
+            $this->_switchCurrentNode($name, $attribs);
+        } else {
+            while ($position > 0) {
+                $this->current_construct .= '</'.$this->current_node[0].'>';
+                array_shift($this->current_node);
+                $position--;
+            }
+            $this->current_construct .= $this->_formatOpenTag($name, $attribs);
+            // If the element does not have children, it shall be self-closed.
+            // This means that the element name is not added
+            // to the array representing the current node tree.
+            if (!empty($this->dtd[$name]['children'])) {
+                array_unshift($this->current_node, $name);
+            }
+        }
+    }
+
+    /**
+     * @param  string  $name
+     * @param  array   $attribs
+     * @param  integer $position
+     * @return void
+     * @access protected
+     */
+    function _insertNode($name, $attribs = array(), $position = 0)
+    {
+        $tag = $this->current_node[$position -1];
+        preg_match_all("/<$tag\b[^>]*?>/", $this->current_construct, $matches, PREG_OFFSET_CAPTURE);
+        $tags = $matches[0];
+        preg_match_all("/<\/$tag>/", $this->current_construct, $matches, PREG_OFFSET_CAPTURE);
+        foreach ($matches[0] as $value) {
+            $limit = count($tags);
+            for ($i = 0; $i < $limit; $i++) {
+                if ($value[1] > $tags[$i][1]
+                    and (!isset($tags[$i +1][1]) or $value[1] < $tags[$i +1][1])) {
+                    if (!isset($tags[$i][2])) {
+                        $tags[$i][2] = $value[1];
+                    } else {
+                        for ($ii = $i; $ii >= 0; $ii--) {
+                            if (!isset($tags[$ii][2])) {
+                                $tags[$ii][2] = $value[1];
+                                break;
+                            }
+                        }
+                    }
+                    break;
+                }
+            }
+        }
+        foreach ($tags as $key => $value) {
+            if (isset($value[2])) {
+                unset($tags[$key]);
+            }
+        }
+        $tags = array_reverse($tags);
+        $tags_pos = array_keys($this->current_node, $tag);
+        $limit = count($tags_pos);
+        for ($i = 0; $i < $limit; $i++) {
+            if ($tags_pos[$i] == $position -1) {
+                $this->current_construct = substr_replace(
+                    $this->current_construct,
+                    $this->_formatOpenTag($name, $attribs),
+                    $tags[$i][1],
+                    0
+                );
+                array_splice($this->current_node, $position, 0, $name);
+                break;
+            }
+        }
+    }
+
+    /**
+     * @param  string  $data
+     * @return void
+     * @access protected
+     */
+    function _appendData($data)
+    {
+        $this->_escapeChars($data);
+        $this->current_construct .= $data;
+    }
+
+    /**
+     * @param  string  $name
+     * @param  array   $attribs
+     * @return boolean
+     * @access protected
+     */
+    function _checkAttributes($name, &$attribs)
+    {
+        if (!empty($attribs)) {
+            $array = array();
+            foreach ($attribs as $key => $value) {
+                $key = strtolower($key);
+                if (in_array($key, $this->dtd[$name]['attributes'])) {
+                    if (empty($value)
+                        and in_array($key, $this->html_minimized_attributes)) {
+                        $value = $key;
+                    } else {
+                        $this->_escapeChars($value);
+                    }
+                    $array[$key] = $value;
+                }
+            }
+            $attribs = $array;
+        }
+        if (isset($this->dtd[$name]['required_attribute'])
+            and !isset($attribs[$this->dtd[$name]['required_attribute']])) {
+            return false;
+        } else {
+            return true;
+        }
+    }
+
+    /**
+     * @param  string  $name
+     * @param  array   $attribs
+     * @return string
+     * @access protected
+     */
+    function _formatOpenTag($name, $attribs = array())
+    {
+        $tag = "<$name";
+        if (!empty($attribs)) {
+            foreach ($attribs as $key => $value) {
+                $tag .= " $key=\"$value\"";
+            }
+        }
+        $tag .= empty($this->dtd[$name]['children'])? ' />': '>';
+        return $tag;
+    }
+
+    /**
+     * @param  string  $string
+     * @return void
+     * @access protected
+     */
+    function _escapeChars(&$string)
+    {
+        $string = preg_replace('/&(?!(?:[a-zA-Z]+|#[0-9]+|#x[0-9a-fA-F]+);)/', '&amp;', $string);
+        $string = str_replace('<', '&lt;', $string);
+        $string = str_replace('>', '&gt;', $string);
+        $string = str_replace('"', '&quot;', $string);
+    }
+
+    /**
+     * @param  string  $name
+     * @param  array   $attribs
+     * @return void
+     * @access protected
+     */
+    function _saveTag($name, $attribs)
+    {
+        if (!empty($attribs)) {
+            $this->saved_tags[$name][] = $attribs;
+        }
+    }
+}
+
+?>

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor2/xhtml1-transitional_dtd.inc.php

@@ -0,0 +1,468 @@
+<?php
+return array(
+	'html' => array(
+		'type' => 'unique',
+		'children' => array('head', 'body'),
+		'attributes' => array('lang', 'xml:lang', 'dir', 'id', 'xmlns'),
+		'default_child' => 'body',
+	),
+	'head' => array(
+		'type' => 'unique',
+		'children' => array('script', 'style', 'meta', 'link', 'object', 'isindex', 'title', 'base'),
+		'attributes' => array('lang', 'xml:lang', 'dir', 'id', 'profile'),
+		'default_parent' => 'html',
+	),
+	'title' => array(
+		'children' => array('#PCDATA'),
+		'attributes' => array('lang', 'xml:lang', 'dir', 'id'),
+		'default_parent' => 'head',
+	),
+	'base' => array(
+		'children' => array(),
+		'attributes' => array('id', 'href', 'target'),
+		'default_parent' => 'head',
+	),
+	'meta' => array(
+		'children' => array(),
+		'attributes' => array('lang', 'xml:lang', 'dir', 'id', 'http-equiv', 'name', 'content', 'scheme'),
+		'default_parent' => 'head',
+	),
+	'link' => array(
+		'children' => array(),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'charset', 'href', 'hreflang', 'type', 'rel', 'rev', 'media', 'target'),
+		'default_parent' => 'head',
+	),
+	'style' => array(
+		'children' => array('#PCDATA'),
+		'attributes' => array('lang', 'xml:lang', 'dir', 'id', 'type', 'media', 'title', 'xml:space'),
+		'default_parent' => 'head',
+	),
+	'script' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA'),
+		'attributes' => array('id', 'charset', 'type', 'language', 'src', 'defer', 'xml:space'),
+		'default_parent' => 'head',
+	),
+	'noscript' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'iframe' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'longdesc', 'name', 'src', 'frameborder', 'marginwidth', 'marginheight', 'scrolling', 'align', 'height', 'width'),
+	),
+	'noframes' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'body' => array(
+		'type' => 'unique',
+		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'onload', 'onunload', 'background', 'bgcolor', 'text', 'link', 'vlink', 'alink'),
+		'default_parent' => 'html',
+	),
+	'div' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
+	),
+	'p' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
+	),
+	'h1' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
+	),
+	'h2' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
+	),
+	'h3' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
+	),
+	'h4' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
+	),
+	'h5' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
+	),
+	'h6' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
+	),
+	'ul' => array(
+		'type' => 'block',
+		'children' => array('li'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'type', 'compact'),
+		'default_child' => 'li',
+	),
+	'ol' => array(
+		'type' => 'block',
+		'children' => array('li'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'type', 'compact', 'start'),
+		'default_child' => 'li',
+	),
+	'menu' => array(
+		'type' => 'block',
+		'children' => array('li'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'compact'),
+		'default_child' => 'li',
+	),
+	'dir' => array(
+		'type' => 'block',
+		'children' => array('li'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'compact'),
+		'default_child' => 'li',
+	),
+	'li' => array(
+		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'type', 'value'),
+		'default_parent' => 'ul',
+	),
+	'dl' => array(
+		'type' => 'block',
+		'children' => array('dt', 'dd'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'compact'),
+		'default_child' => 'dt',
+	),
+	'dt' => array(
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+		'default_parent' => 'dl',
+	),
+	'dd' => array(
+		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+		'default_parent' => 'dl',
+	),
+	'address' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script', 'p'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'hr' => array(
+		'type' => 'block',
+		'children' => array(),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align', 'noshade', 'size', 'width'),
+	),
+	'pre' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'tt', 'i', 'b', 'u', 's', 'strike', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'width', 'xml:space'),
+	),
+	'blockquote' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'cite'),
+	),
+	'center' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'ins' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'cite', 'datetime'),
+	),
+	'del' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'cite', 'datetime'),
+	),
+	'a' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'accesskey', 'tabindex', 'onfocus', 'onblur', 'charset', 'type', 'name', 'href', 'hreflang', 'rel', 'rev', 'shape', 'coords', 'target'),
+		'required_attribute' => 'href'
+	),
+	'span' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'bdo' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'lang', 'xml:lang', 'dir'),
+	),
+	'br' => array(
+		'type' => 'inline',
+		'children' => array(),
+		'attributes' => array('id', 'class', 'style', 'title', 'clear'),
+	),
+	'em' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'strong' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'dfn' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'code' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'samp' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'kbd' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'var' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'cite' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'abbr' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'acronym' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'q' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'cite'),
+	),
+	'sub' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'sup' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'tt' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'i' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'b' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'big' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'small' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'u' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	's' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'strike' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'basefont' => array(
+		'type' => 'inline',
+		'children' => array(),
+		'attributes' => array('id', 'size', 'color', 'face'),
+	),
+	'font' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'size', 'color', 'face'),
+	),
+	'object' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'param', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'declare', 'classid', 'codebase', 'data', 'type', 'codetype', 'archive', 'standby', 'height', 'width', 'usemap', 'name', 'tabindex', 'align', 'border', 'hspace', 'vspace'),
+	),
+	'param' => array(
+		'children' => array(),
+		'attributes' => array('id', 'name', 'value', 'valuetype', 'type'),
+	),
+	'applet' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'param', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'codebase', 'archive', 'code', 'object', 'alt', 'name', 'width', 'height', 'align', 'hspace', 'vspace'),
+	),
+	'img' => array(
+		'type' => 'inline',
+		'children' => array(),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'src', 'alt', 'name', 'longdesc', 'height', 'width', 'usemap', 'ismap', 'align', 'border', 'hspace', 'vspace'),
+		'required_attribute' => 'src'
+	),
+	'map' => array(
+		'type' => 'inline',
+		'children' => array('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'noscript', 'ins', 'del', 'script', 'area'),
+		'attributes' => array('lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'id', 'class', 'style', 'title', 'name'),
+	),
+	'area' => array(
+		'children' => array(),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'accesskey', 'tabindex', 'onfocus', 'onblur', 'shape', 'coords', 'href', 'nohref', 'alt', 'target'),
+		'default_parent' => 'map',
+		'required_attribute' => 'href'
+	),
+	'form' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'action', 'method', 'name', 'enctype', 'onsubmit', 'onreset', 'accept', 'accept-charset', 'target'),
+	),
+	'label' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'for', 'accesskey', 'onfocus', 'onblur'),
+	),
+	'input' => array(
+		'type' => 'inline',
+		'children' => array(),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'accesskey', 'tabindex', 'onfocus', 'onblur', 'type', 'name', 'value', 'checked', 'disabled', 'readonly', 'size', 'maxlength', 'src', 'alt', 'usemap', 'onselect', 'onchange', 'accept', 'align'),
+	),
+	'select' => array(
+		'type' => 'inline',
+		'children' => array('optgroup', 'option'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'name', 'size', 'multiple', 'disabled', 'tabindex', 'onfocus', 'onblur', 'onchange'),
+		'ignore_invalid_children' => TRUE,
+		'default_child' => 'option',
+	),
+	'optgroup' => array(
+		'children' => array('option'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'disabled', 'label'),
+		'ignore_invalid_children' => TRUE,
+		'default_child' => 'option',
+	),
+	'option' => array(
+		'children' => array('#PCDATA'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'selected', 'disabled', 'label', 'value'),
+		'ignore_invalid_children' => TRUE,
+		'default_parent' => 'select',
+	),
+	'textarea' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'accesskey', 'tabindex', 'onfocus', 'onblur', 'name', 'rows', 'cols', 'disabled', 'readonly', 'onselect', 'onchange'),
+	),
+	'fieldset' => array(
+		'type' => 'block',
+		'children' => array('#PCDATA', 'legend', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
+	),
+	'legend' => array(
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'accesskey', 'align'),
+	),
+	'button' => array(
+		'type' => 'inline',
+		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'table', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'accesskey', 'tabindex', 'onfocus', 'onblur', 'name', 'value', 'type', 'disabled'),
+	),
+	'isindex' => array(
+		'type' => 'block',
+		'children' => array(),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'prompt'),
+	),
+	'table' => array(
+		'type' => 'block',
+		'children' => array('caption', 'col', 'colgroup', 'thead', 'tfoot', 'tbody', 'tr'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'summary', 'width', 'border', 'frame', 'rules', 'cellspacing', 'cellpadding', 'align', 'bgcolor'),
+		'default_child' => 'tr',
+	),
+	'caption' => array(
+		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
+	),
+	'thead' => array(
+		'children' => array('tr'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align', 'char', 'charoff', 'valign'),
+		'default_parent' => 'table',
+		'default_child' => 'tr',
+	),
+	'tfoot' => array(
+		'children' => array('tr'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align', 'char', 'charoff', 'valign'),
+		'default_parent' => 'table',
+		'default_child' => 'tr',
+	),
+	'tbody' => array(
+		'children' => array('tr'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align', 'char', 'charoff', 'valign'),
+		'default_parent' => 'table',
+		'default_child' => 'tr',
+	),
+	'colgroup' => array(
+		'children' => array('col'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'span', 'width', 'align', 'char', 'charoff', 'valign'),
+		'default_parent' => 'table',
+		'default_child' => 'col',
+	),
+	'col' => array(
+		'children' => array(),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'span', 'width', 'align', 'char', 'charoff', 'valign'),
+		'default_parent' => 'colgroup',
+	),
+	'tr' => array(
+		'children' => array('th', 'td'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align', 'char', 'charoff', 'valign', 'bgcolor'),
+		'default_parent' => 'table',
+		'default_child' => 'td',
+	),
+	'th' => array(
+		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'abbr', 'axis', 'headers', 'scope', 'rowspan', 'colspan', 'align', 'char', 'charoff', 'valign', 'nowrap', 'bgcolor', 'width', 'height'),
+		'default_parent' => 'tr',
+	),
+	'td' => array(
+		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
+		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'abbr', 'axis', 'headers', 'scope', 'rowspan', 'colspan', 'align', 'char', 'charoff', 'valign', 'nowrap', 'bgcolor', 'width', 'height'),
+		'default_parent' => 'tr',
+	),
+);
+?>

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/HTMLParser.class.php

@@ -1,743 +0,0 @@
-<?php
-
-/**
- * ---------------------------------------------------------------------
- * HTMLParser class
- * ---------------------------------------------------------------------
- * PHP versions 4 and 5
- * ---------------------------------------------------------------------
- * LICENSE: This source file is subject to the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * either version 2.1 of the License, or any later version
- * that is available through the world-wide-web at the following URI:
- * http://www.gnu.org/licenses/lgpl.html
- * If you did not have a copy of the GNU Lesser General Public License
- * and are unable to obtain it through the web, please write to
- * the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ---------------------------------------------------------------------
- */
-
-require_once('XML/HTMLSax3.php');
-
-/**
- * HTMLParser class
- *
- * A SAX based parser using PEAR XML_HTMLSax3 class
- * helps you to have a XML compliant document
- * from malformed markups such as HTML.
- *
- * @version    1.2.1 (stable) issued May 17, 2007
- * @author     ucb.rcdtokyo http://www.rcdtokyo.com/ucb/
- * @license    GNU LGPL v2.1+ http://www.gnu.org/licenses/lgpl.html
- * @see        http://pear.php.net/package/XML_HTMLSax3
- *
- * Basic usage:
- * <code>
- * $source = '<HTML><FOO><P ALIGN=CENTER FOO=BAR>FOO&BAR<BAR>;
- * $parser = new HTMLParser;
- * $parser->setRoot('html');
- * $parser->setGenericParent('body');
- * $parser->setRule(array(
- *   'html' => array(
- *     'children' => array('body'),
- *     'attributes' => array('xmlns', 'xml:lang'),
- *     'default_child' => 'body'),
- *   'body' => array(
- *     'children' => array('p'),
- *     'attributes' => array('id', 'class', 'style' 'xml:lang'),
- *   'p' => array(
- *     'children' => array('#PCDATA'),
- *     'attributes' => array('id', 'class', 'style' 'xml:lang', 'align'),
- * ));
- * $parser->parse($source);
- * $result = $parser->dump();
- * // $result is:
- * // <html><body><p align="CENTER">FOO&amp;BAR</p></body></html>
- * </code>
- */
-class HTMLParser
-{
-    /**
-     * @var array
-     * @access protected
-     */
-    var $dtd = array();
-
-    /**
-     * @var array
-     * @access protected
-     */
-    var $construct = array();
-
-    /**
-     * @var string
-     * @access protected
-     */
-    var $current_construct = '';
-
-    /**
-     * @var array
-     * @access protected
-     */
-    var $node = array();
-
-    /**
-     * @var array
-     * @access protected
-     */
-    var $current_node = array();
-
-    /**
-     * @var array
-     * @access protected
-     */
-    var $root = array();
-
-    /**
-     * @var array
-     * @access protected
-     */
-    var $tags_to_save = array();
-
-    /**
-     * @var array
-     * @access protected
-     */
-    var $saved_tags = array();
-
-    /**
-     * @var string
-     * @access protected
-     */
-    var $generic_parent;
-
-    /**
-     * Attributes minimized in HTML.
-     *
-     * @var array
-     * @access protected
-     */
-    var $html_minimized_attributes = array(
-        'checked', 'compact', 'controls', 'declare', 'defer', 'disabled',
-        'ismap', 'mayscript', 'multiple', 'nohref', 'noshade', 'nowrap',
-        'readonly', 'selected', 'utn', 'wrap'
-    );
-
-    /**
-     * @access public
-     */
-    function HTMLParser()
-    {
-        $this->__construct();
-    }
-
-    /**
-     * @access public
-     */
-    function __construct()
-    {
-    }
-
-    /**
-     * @param  string  $data
-     * @access public
-     */
-    function parse($data)
-    {
-        $parser = new XML_HTMLSax3;
-        $parser->set_object($this);
-        $parser->set_element_handler('openHandler', 'closeHandler');
-        $parser->set_data_handler('dataHandler');
-        $parser->set_escape_handler('escapeHandler');
-        $parser->set_option('XML_OPTION_TRIM_DATA_NODES', 0);
-        $parser->parse($data);
-    }
-
-    /**
-     * Handling open tags.
-     *
-     * @param  object  $parser
-     * @param  string  $name
-     * @param  array   $attribs
-     * @return void
-     * @access public
-     */
-    function openHandler(&$parser, $name, $attribs)
-    {
-        $name = strtolower($name);
-        // Do nothing if the element name is not defined.
-        if (isset($this->dtd[$name])) {
-            if (isset($this->dtd[$name]['replace'])) {
-                $name = $this->dtd[$name]['replace'];
-            }
-            if ($this->_checkAttributes($name, $attribs)) {
-                if (in_array($name, $this->tags_to_save)) {
-                    $this->_saveTag($name, $attribs);
-                }
-                if (!empty($this->current_node)) {
-                    if (!isset($this->node[$name])) {
-                        $this->_elementLookup($name, $attribs);
-                    } elseif (isset($this->dtd[$name]['type'])
-                        and $this->dtd[$name]['type'] == 'unique'
-                        and !empty($attribs)
-                        and false !== strpos($this->current_construct, "<$name>")) {
-                        $this->current_construct = str_replace(
-                            "<$name>",
-                            $this->_formatOpenTag($name, $attribs),
-                            $this->current_construct
-                        );
-                    }
-                } elseif ($name != $this->root[0]) {
-                    $this->_appendNode(
-                        $this->root[0],
-                        (isset($this->root[1])? $this->root[1]: array())
-                    );
-                    $this->_elementLookup($name, $attribs);
-                } else {
-                    $this->_appendNode($name, $attribs);
-                }
-            }
-        }
-    }
-
-    /**
-     * Handling close tags.
-     *
-     * @param  object  $parser
-     * @param  string  $name
-     * @return void
-     * @access public
-     */
-    function closeHandler(&$parser, $name)
-    {
-        $name = strtolower(trim($name));
-        // Do nothing if the element name is not defined
-        // or the array representing the current node tree is empty.
-        if (isset($this->dtd[$name]) and !empty($this->current_node)) {
-            if (isset($this->dtd[$name]['replace'])) {
-                $name = $this->dtd[$name]['replace'];
-            }
-            if (!isset($this->dtd[$name]['type']) or $this->dtd[$name]['type'] != 'unique') {
-                if ($name == $this->current_node[0]) {
-                    $tagname = array_shift($this->current_node);
-                    $this->current_construct .= "</$tagname>";
-                } elseif ($positions = array_keys($this->current_node, $name)) {
-                    $found = true;
-                    $array = array_slice($this->current_node, 0, $positions[0]);
-                    if (isset($this->dtd[$name]['default_parent'])
-                        and in_array($this->dtd[$name]['default_parent'], $array)) {
-                        $found = false;
-                    }
-                    if ($found) {
-                        for ($i = 0; $i <= $positions[0]; $i++) {
-                            $tagname = array_shift($this->current_node);
-                            $this->current_construct .= "</$tagname>";
-                        }
-                    }
-                } else {
-                    foreach ($this->node as $key => $value) {
-                        if ($name == $value[0]) {
-                            $this->_switchCurrentNode($key);
-                            $tagname = array_shift($this->current_node);
-                            $this->current_construct .= "</$tagname>";
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    /**
-     * Handling data (#PCDATA) in elements.
-     *
-     * @param  object  $parser
-     * @param  string  $data
-     * @return void
-     * @access public
-     */
-    function dataHandler(&$parser, $data)
-    {
-        $data = preg_replace('/^[\t\r\n]*(.*)[\t\r\n]*$/', '$1', $data);
-        if (strlen($data) > 0) {
-            if (!empty($this->current_node)) {
-                $this->_dataLookup($data);
-            } else {
-                $this->_appendNode(
-                    $this->root[0],
-                    (isset($this->root[1])? $this->root[1]: array())
-                );
-                $this->_dataLookup($data);
-            }
-        }
-    }
-
-    /**
-     * Handling XML escapes (DOCTYPE declaration, Comment and CDATA section).
-     *
-     * @param  object  $parser
-     * @param  string  $data
-     * @return void
-     * @access public
-     */
-    function escapeHandler(&$parser, $data)
-    {
-        $data = preg_replace('/^(-{2,}.*?)-*$/s', '$1--', $data);
-        $this->current_construct .= "<!$data>";
-    }
-
-    /**
-     * Handling processing instructions.
-     *
-     * @param  object  $parser
-     * @param  string  $data
-     * @return void
-     * @access public
-     */
-    function piHandler(&$parser, $target, $data)
-    {
-    }
-
-    /**
-     * Handling JSP/ASP tags.
-     *
-     * @param  object  $parser
-     * @param  string  $data
-     * @return void
-     * @access public
-     */
-    function jaspHandler(&$parser, $data)
-    {
-    }
-
-    /**
-     * @param  array   $array
-     * @return void
-     * @access public
-     */
-    function setRule($rule)
-    {
-        $this->dtd =& $rule;
-    }
-
-    /**
-     * @param  string  $file
-     * @return void
-     * @access public
-     */
-    function setRuleFile($filename)
-    {
-        $this->dtd = require $filename;
-    }
-
-    /**
-     * @param  string  $encoding
-     * @return string
-     * @access public
-     */
-    function dump($encoding = 'UTF-8')
-    {
-        $result = $this->construct[$this->root[0]];
-        unset($this->construct[$this->root[0]]);
-        foreach ($this->construct as $key => $value) {
-            $result .= $value;
-            if (!empty($this->node[$key])) {
-                foreach ($this->node[$key] as $value) {
-                    $result .= "</$value>";
-                }
-            }
-        }
-        if (!empty($this->node[$this->root[0]])) {
-            foreach ($this->node[$this->root[0]] as $value) {
-                $result .= "</$value>";
-            }
-        }
-        if ($encoding != 'UTF-8'
-            and function_exists('mb_convert_variables')) {
-            mb_convert_variables($encoding, 'UTF-8', $result);
-        }
-        return $result;
-    }
-
-    /**
-     * Specify the root element of the document.
-     * If the name of the first element of the parsed document
-     * does not match the name of the root element you specified,
-     * the root element is automatically supplied.
-     * This will help you when you need a fully XML compliant output
-     * which must have a root element.
-     * Note: Attributes are NOT evaluated.
-     *
-     * Example:
-     * <code>
-     * $parser->setRoot(
-     *   'html',
-     *   array(
-     *     'xmlns' => 'http://www.w3.org/1999/xhtml',
-     *     'xml:lang' => 'ja'
-     *   )
-     * );
-     * </code>
-     *
-     * @param  string  $name
-     * @param  array   $attribs
-     * @return void
-     * @access public
-     */
-    function setRoot($name, $attribs = array())
-    {
-        $this->root = array($name, $attribs);
-    }
-
-    /**
-     * @param  string
-     * @return void
-     * @access public
-     */
-    function setGenericParent($name)
-    {
-        $this->generic_parent = $name;
-    }
-
-    /**
-     * @param  mixed
-     * @return void
-     * @access public
-     */
-    function setTagsToSave()
-    {
-        $args = func_get_args();
-        foreach ($args as $arg) {
-            if (is_array($arg)) {
-                $this->tags_to_save = array_merge($this->tags_to_save, $arg);
-            } else {
-                $this->tags_to_save[] = $arg;
-            }
-        }
-    }
-
-    /**
-     * @param  string  $encoding
-     * @return array
-     * @access public
-     */
-    function getSavedTags($encoding = 'UTF-8')
-    {
-        if ($encoding != 'UTF-8'
-            and function_exists('mb_convert_variables')) {
-            mb_convert_variables($encoding, 'UTF-8', $this->saved_tags);
-        }
-        return $this->saved_tags;
-    }
-
-    /**
-     * @param  string  $name
-     * @param  array   $attribs
-     * @return void
-     * @access protected
-     */
-    function _switchCurrentNode($name, $attribs = array())
-    {
-        if (!isset($this->node[$name])) {
-            $this->node[$name] = array($name);
-        }
-        $this->current_node =& $this->node[$name];
-        if (!isset($this->construct[$name])) {
-            $this->construct[$name] = $this->_formatOpenTag($name, $attribs);
-        }
-        $this->current_construct =& $this->construct[$name];
-    }
-
-    /**
-     * @param  string  $name
-     * @param  array   $attribs
-     * @return void
-     * @access protected
-     */
-    function _elementLookup($name, $attribs)
-    {
-        if (in_array($name, $this->dtd[$this->current_node[0]]['children'])) {
-            $this->_appendNode($name, $attribs);
-        } elseif (isset($this->dtd[$this->current_node[0]]['ignore_invalid_children'])) {
-            if (isset($this->dtd[$this->current_node[0]]['default_parent'])
-                and in_array($name, $this->dtd[$this->dtd[$this->current_node[0]]['default_parent']]['children'])) {
-                $this->_appendNode($name, $attribs, 1);
-            }
-        } elseif (isset($this->dtd[$name]['default_parent'])
-            and isset($this->dtd[$this->current_node[0]]['default_child'])
-            and $this->dtd[$name]['default_parent'] == $this->dtd[$this->current_node[0]]['default_child']) {
-            $this->_appendNode($this->dtd[$name]['default_parent']);
-            $this->_appendNode($name, $attribs);
-        } else {
-            $found = false;
-            if (!$found and isset($this->dtd[$this->current_node[0]]['default_child'])) {
-                $tagname = $this->dtd[$this->current_node[0]]['default_child'];
-                $array = array($tagname);
-                while (isset($this->dtd[$tagname]['default_child'])) {
-                    $tagname = $this->dtd[$tagname]['default_child'];
-                    $array[] = $tagname;
-                }
-                if (in_array($name, $this->dtd[$array[count($array) -1]]['children'])) {
-                    $found = true;
-                    foreach ($array as $value) {
-                        $this->_appendNode($value);
-                    }
-                    $this->_appendNode($name, $attribs);
-                }
-            }
-            if (!$found and $this->_lookupNodeTree($name, $attribs)) {
-                $found = true;
-            }
-            if (!$found) {
-                $tagname = isset($this->dtd[$name]['default_parent'])?
-                    $this->dtd[$name]['default_parent']:
-                    (isset($this->generic_parent)? $this->generic_parent: null);
-                if ($tagname) {
-                    if (isset($this->dtd[$tagname]['type']) and $this->dtd[$tagname]['type'] == 'unique') {
-                        $this->_switchCurrentNode($tagname);
-                        $this->_appendNode($name, $attribs);
-                    } else {
-                        if ($this->_lookupNodeTree($tagname)) {
-                            $this->_appendNode($name, $attribs);
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    /**
-     * @param  string  $data
-     * @return void
-     * @access protected
-     */
-    function _dataLookup($data)
-    {
-        if (in_array('#PCDATA', $this->dtd[$this->current_node[0]]['children'])) {
-            $this->_appendData($data);
-        } elseif (!preg_match('/^\s*$/s', $data)
-            and !isset($this->dtd[$this->current_node[0]]['ignore_invalid_children'])
-            and isset($this->dtd[$this->current_node[0]]['default_child'])) {
-            $tagname = $this->dtd[$this->current_node[0]]['default_child'];
-            $array = array($tagname);
-            while (isset($this->dtd[$tagname]['default_child'])) {
-                $tagname = $this->dtd[$tagname]['default_child'];
-                $array[] = $tagname;
-            }
-            if (in_array('#PCDATA', $this->dtd[$array[count($array) -1]]['children'])) {
-                foreach ($array as $value) {
-                    $this->_appendNode($value);
-                }
-                $this->_appendData($data);
-            }
-        }
-    }
-
-    /**
-     * @param  string  $name
-     * @param  array   $attribs
-     * @return boolean
-     * @access protected
-     */
-    function _lookupNodeTree($name, $attribs = array())
-    {
-        $limit = count($this->current_node);
-        for ($i = 0; $i < $limit; $i++) {
-            if (in_array($name, $this->dtd[$this->current_node[$i]]['children'])) {
-                if (!isset($this->current_node[$i -1])
-                    or !in_array($this->current_node[$i -1], $this->dtd[$name]['children'])) {
-                    $this->_appendNode($name, $attribs, $i);
-                } else {
-                    $this->_insertNode($name, $attribs, $i);
-                }
-                return true;
-                break;
-            } elseif (
-                (isset($this->dtd[$name]['type'])
-                and $this->dtd[$name]['type'] == 'inline'
-                and isset($this->dtd[$this->current_node[$i]]['type'])
-                and $this->dtd[$this->current_node[$i]]['type'] == 'block')
-                or
-                (isset($this->dtd[$name]['default_parent'])
-                and isset($this->dtd[$this->dtd[$name]['default_parent']]['default_parent'])
-                and $this->current_node[$i] == $this->dtd[$this->dtd[$name]['default_parent']]['default_parent'])
-                ) {
-                return false;
-                break;
-            }
-        }
-        return false;
-    }
-
-    /**
-     * @param  string  $name
-     * @param  array   $attribs
-     * @param  integer $position
-     * @return void
-     * @access protected
-     */
-    function _appendNode($name, $attribs = array(), $position = 0)
-    {
-        if (isset($this->dtd[$name]['type'])
-            and $this->dtd[$name]['type'] == 'unique') {
-            $this->_switchCurrentNode($name, $attribs);
-        } else {
-            while ($position > 0) {
-                $this->current_construct .= '</'.$this->current_node[0].'>';
-                array_shift($this->current_node);
-                $position--;
-            }
-            $this->current_construct .= $this->_formatOpenTag($name, $attribs);
-            // If the element does not have children, it shall be self-closed.
-            // This means that the element name is not added
-            // to the array representing the current node tree.
-            if (!empty($this->dtd[$name]['children'])) {
-                array_unshift($this->current_node, $name);
-            }
-        }
-    }
-
-    /**
-     * @param  string  $name
-     * @param  array   $attribs
-     * @param  integer $position
-     * @return void
-     * @access protected
-     */
-    function _insertNode($name, $attribs = array(), $position = 0)
-    {
-        $tag = $this->current_node[$position -1];
-        preg_match_all("/<$tag\b[^>]*?>/", $this->current_construct, $matches, PREG_OFFSET_CAPTURE);
-        $tags = $matches[0];
-        preg_match_all("/<\/$tag>/", $this->current_construct, $matches, PREG_OFFSET_CAPTURE);
-        foreach ($matches[0] as $value) {
-            $limit = count($tags);
-            for ($i = 0; $i < $limit; $i++) {
-                if ($value[1] > $tags[$i][1]
-                    and (!isset($tags[$i +1][1]) or $value[1] < $tags[$i +1][1])) {
-                    if (!isset($tags[$i][2])) {
-                        $tags[$i][2] = $value[1];
-                    } else {
-                        for ($ii = $i; $ii >= 0; $ii--) {
-                            if (!isset($tags[$ii][2])) {
-                                $tags[$ii][2] = $value[1];
-                                break;
-                            }
-                        }
-                    }
-                    break;
-                }
-            }
-        }
-        foreach ($tags as $key => $value) {
-            if (isset($value[2])) {
-                unset($tags[$key]);
-            }
-        }
-        $tags = array_reverse($tags);
-        $tags_pos = array_keys($this->current_node, $tag);
-        $limit = count($tags_pos);
-        for ($i = 0; $i < $limit; $i++) {
-            if ($tags_pos[$i] == $position -1) {
-                $this->current_construct = substr_replace(
-                    $this->current_construct,
-                    $this->_formatOpenTag($name, $attribs),
-                    $tags[$i][1],
-                    0
-                );
-                array_splice($this->current_node, $position, 0, $name);
-                break;
-            }
-        }
-    }
-
-    /**
-     * @param  string  $data
-     * @return void
-     * @access protected
-     */
-    function _appendData($data)
-    {
-        $this->_escapeChars($data);
-        $this->current_construct .= $data;
-    }
-
-    /**
-     * @param  string  $name
-     * @param  array   $attribs
-     * @return boolean
-     * @access protected
-     */
-    function _checkAttributes($name, &$attribs)
-    {
-        if (!empty($attribs)) {
-            $array = array();
-            foreach ($attribs as $key => $value) {
-                $key = strtolower($key);
-                if (in_array($key, $this->dtd[$name]['attributes'])) {
-                    if (empty($value)
-                        and in_array($key, $this->html_minimized_attributes)) {
-                        $value = $key;
-                    } else {
-                        $this->_escapeChars($value);
-                    }
-                    $array[$key] = $value;
-                }
-            }
-            $attribs = $array;
-        }
-        if (isset($this->dtd[$name]['required_attribute'])
-            and !isset($attribs[$this->dtd[$name]['required_attribute']])) {
-            return false;
-        } else {
-            return true;
-        }
-    }
-
-    /**
-     * @param  string  $name
-     * @param  array   $attribs
-     * @return string
-     * @access protected
-     */
-    function _formatOpenTag($name, $attribs = array())
-    {
-        $tag = "<$name";
-        if (!empty($attribs)) {
-            foreach ($attribs as $key => $value) {
-                $tag .= " $key=\"$value\"";
-            }
-        }
-        $tag .= empty($this->dtd[$name]['children'])? ' />': '>';
-        return $tag;
-    }
-
-    /**
-     * @param  string  $string
-     * @return void
-     * @access protected
-     */
-    function _escapeChars(&$string)
-    {
-        $string = preg_replace('/&(?!(?:[a-zA-Z]+|#[0-9]+|#x[0-9a-fA-F]+);)/', '&amp;', $string);
-        $string = str_replace('<', '&lt;', $string);
-        $string = str_replace('>', '&gt;', $string);
-        $string = str_replace('"', '&quot;', $string);
-    }
-
-    /**
-     * @param  string  $name
-     * @param  array   $attribs
-     * @return void
-     * @access protected
-     */
-    function _saveTag($name, $attribs)
-    {
-        if (!empty($attribs)) {
-            $this->saved_tags[$name][] = $attribs;
-        }
-    }
-}
-
-?>

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/xhtml1-transitional_dtd.inc.php

@@ -1,468 +0,0 @@
-<?php
-return array(
-	'html' => array(
-		'type' => 'unique',
-		'children' => array('head', 'body'),
-		'attributes' => array('lang', 'xml:lang', 'dir', 'id', 'xmlns'),
-		'default_child' => 'body',
-	),
-	'head' => array(
-		'type' => 'unique',
-		'children' => array('script', 'style', 'meta', 'link', 'object', 'isindex', 'title', 'base'),
-		'attributes' => array('lang', 'xml:lang', 'dir', 'id', 'profile'),
-		'default_parent' => 'html',
-	),
-	'title' => array(
-		'children' => array('#PCDATA'),
-		'attributes' => array('lang', 'xml:lang', 'dir', 'id'),
-		'default_parent' => 'head',
-	),
-	'base' => array(
-		'children' => array(),
-		'attributes' => array('id', 'href', 'target'),
-		'default_parent' => 'head',
-	),
-	'meta' => array(
-		'children' => array(),
-		'attributes' => array('lang', 'xml:lang', 'dir', 'id', 'http-equiv', 'name', 'content', 'scheme'),
-		'default_parent' => 'head',
-	),
-	'link' => array(
-		'children' => array(),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'charset', 'href', 'hreflang', 'type', 'rel', 'rev', 'media', 'target'),
-		'default_parent' => 'head',
-	),
-	'style' => array(
-		'children' => array('#PCDATA'),
-		'attributes' => array('lang', 'xml:lang', 'dir', 'id', 'type', 'media', 'title', 'xml:space'),
-		'default_parent' => 'head',
-	),
-	'script' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA'),
-		'attributes' => array('id', 'charset', 'type', 'language', 'src', 'defer', 'xml:space'),
-		'default_parent' => 'head',
-	),
-	'noscript' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'iframe' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'longdesc', 'name', 'src', 'frameborder', 'marginwidth', 'marginheight', 'scrolling', 'align', 'height', 'width'),
-	),
-	'noframes' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'body' => array(
-		'type' => 'unique',
-		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'onload', 'onunload', 'background', 'bgcolor', 'text', 'link', 'vlink', 'alink'),
-		'default_parent' => 'html',
-	),
-	'div' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
-	),
-	'p' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
-	),
-	'h1' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
-	),
-	'h2' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
-	),
-	'h3' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
-	),
-	'h4' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
-	),
-	'h5' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
-	),
-	'h6' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
-	),
-	'ul' => array(
-		'type' => 'block',
-		'children' => array('li'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'type', 'compact'),
-		'default_child' => 'li',
-	),
-	'ol' => array(
-		'type' => 'block',
-		'children' => array('li'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'type', 'compact', 'start'),
-		'default_child' => 'li',
-	),
-	'menu' => array(
-		'type' => 'block',
-		'children' => array('li'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'compact'),
-		'default_child' => 'li',
-	),
-	'dir' => array(
-		'type' => 'block',
-		'children' => array('li'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'compact'),
-		'default_child' => 'li',
-	),
-	'li' => array(
-		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'type', 'value'),
-		'default_parent' => 'ul',
-	),
-	'dl' => array(
-		'type' => 'block',
-		'children' => array('dt', 'dd'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'compact'),
-		'default_child' => 'dt',
-	),
-	'dt' => array(
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-		'default_parent' => 'dl',
-	),
-	'dd' => array(
-		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-		'default_parent' => 'dl',
-	),
-	'address' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script', 'p'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'hr' => array(
-		'type' => 'block',
-		'children' => array(),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align', 'noshade', 'size', 'width'),
-	),
-	'pre' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'tt', 'i', 'b', 'u', 's', 'strike', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'width', 'xml:space'),
-	),
-	'blockquote' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'cite'),
-	),
-	'center' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'ins' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'cite', 'datetime'),
-	),
-	'del' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'cite', 'datetime'),
-	),
-	'a' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'accesskey', 'tabindex', 'onfocus', 'onblur', 'charset', 'type', 'name', 'href', 'hreflang', 'rel', 'rev', 'shape', 'coords', 'target'),
-		'required_attribute' => 'href'
-	),
-	'span' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'bdo' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'lang', 'xml:lang', 'dir'),
-	),
-	'br' => array(
-		'type' => 'inline',
-		'children' => array(),
-		'attributes' => array('id', 'class', 'style', 'title', 'clear'),
-	),
-	'em' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'strong' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'dfn' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'code' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'samp' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'kbd' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'var' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'cite' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'abbr' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'acronym' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'q' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'cite'),
-	),
-	'sub' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'sup' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'tt' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'i' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'b' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'big' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'small' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'u' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	's' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'strike' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'basefont' => array(
-		'type' => 'inline',
-		'children' => array(),
-		'attributes' => array('id', 'size', 'color', 'face'),
-	),
-	'font' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'size', 'color', 'face'),
-	),
-	'object' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'param', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'declare', 'classid', 'codebase', 'data', 'type', 'codetype', 'archive', 'standby', 'height', 'width', 'usemap', 'name', 'tabindex', 'align', 'border', 'hspace', 'vspace'),
-	),
-	'param' => array(
-		'children' => array(),
-		'attributes' => array('id', 'name', 'value', 'valuetype', 'type'),
-	),
-	'applet' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'param', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'codebase', 'archive', 'code', 'object', 'alt', 'name', 'width', 'height', 'align', 'hspace', 'vspace'),
-	),
-	'img' => array(
-		'type' => 'inline',
-		'children' => array(),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'src', 'alt', 'name', 'longdesc', 'height', 'width', 'usemap', 'ismap', 'align', 'border', 'hspace', 'vspace'),
-		'required_attribute' => 'src'
-	),
-	'map' => array(
-		'type' => 'inline',
-		'children' => array('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'noscript', 'ins', 'del', 'script', 'area'),
-		'attributes' => array('lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'id', 'class', 'style', 'title', 'name'),
-	),
-	'area' => array(
-		'children' => array(),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'accesskey', 'tabindex', 'onfocus', 'onblur', 'shape', 'coords', 'href', 'nohref', 'alt', 'target'),
-		'default_parent' => 'map',
-		'required_attribute' => 'href'
-	),
-	'form' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'action', 'method', 'name', 'enctype', 'onsubmit', 'onreset', 'accept', 'accept-charset', 'target'),
-	),
-	'label' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'for', 'accesskey', 'onfocus', 'onblur'),
-	),
-	'input' => array(
-		'type' => 'inline',
-		'children' => array(),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'accesskey', 'tabindex', 'onfocus', 'onblur', 'type', 'name', 'value', 'checked', 'disabled', 'readonly', 'size', 'maxlength', 'src', 'alt', 'usemap', 'onselect', 'onchange', 'accept', 'align'),
-	),
-	'select' => array(
-		'type' => 'inline',
-		'children' => array('optgroup', 'option'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'name', 'size', 'multiple', 'disabled', 'tabindex', 'onfocus', 'onblur', 'onchange'),
-		'ignore_invalid_children' => TRUE,
-		'default_child' => 'option',
-	),
-	'optgroup' => array(
-		'children' => array('option'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'disabled', 'label'),
-		'ignore_invalid_children' => TRUE,
-		'default_child' => 'option',
-	),
-	'option' => array(
-		'children' => array('#PCDATA'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'selected', 'disabled', 'label', 'value'),
-		'ignore_invalid_children' => TRUE,
-		'default_parent' => 'select',
-	),
-	'textarea' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'accesskey', 'tabindex', 'onfocus', 'onblur', 'name', 'rows', 'cols', 'disabled', 'readonly', 'onselect', 'onchange'),
-	),
-	'fieldset' => array(
-		'type' => 'block',
-		'children' => array('#PCDATA', 'legend', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup'),
-	),
-	'legend' => array(
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'accesskey', 'align'),
-	),
-	'button' => array(
-		'type' => 'inline',
-		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'table', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'accesskey', 'tabindex', 'onfocus', 'onblur', 'name', 'value', 'type', 'disabled'),
-	),
-	'isindex' => array(
-		'type' => 'block',
-		'children' => array(),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'prompt'),
-	),
-	'table' => array(
-		'type' => 'block',
-		'children' => array('caption', 'col', 'colgroup', 'thead', 'tfoot', 'tbody', 'tr'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'summary', 'width', 'border', 'frame', 'rules', 'cellspacing', 'cellpadding', 'align', 'bgcolor'),
-		'default_child' => 'tr',
-	),
-	'caption' => array(
-		'children' => array('#PCDATA', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align'),
-	),
-	'thead' => array(
-		'children' => array('tr'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align', 'char', 'charoff', 'valign'),
-		'default_parent' => 'table',
-		'default_child' => 'tr',
-	),
-	'tfoot' => array(
-		'children' => array('tr'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align', 'char', 'charoff', 'valign'),
-		'default_parent' => 'table',
-		'default_child' => 'tr',
-	),
-	'tbody' => array(
-		'children' => array('tr'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align', 'char', 'charoff', 'valign'),
-		'default_parent' => 'table',
-		'default_child' => 'tr',
-	),
-	'colgroup' => array(
-		'children' => array('col'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'span', 'width', 'align', 'char', 'charoff', 'valign'),
-		'default_parent' => 'table',
-		'default_child' => 'col',
-	),
-	'col' => array(
-		'children' => array(),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'span', 'width', 'align', 'char', 'charoff', 'valign'),
-		'default_parent' => 'colgroup',
-	),
-	'tr' => array(
-		'children' => array('th', 'td'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'align', 'char', 'charoff', 'valign', 'bgcolor'),
-		'default_parent' => 'table',
-		'default_child' => 'td',
-	),
-	'th' => array(
-		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'abbr', 'axis', 'headers', 'scope', 'rowspan', 'colspan', 'align', 'char', 'charoff', 'valign', 'nowrap', 'bgcolor', 'width', 'height'),
-		'default_parent' => 'tr',
-	),
-	'td' => array(
-		'children' => array('#PCDATA', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'ul', 'ol', 'dl', 'menu', 'dir', 'pre', 'hr', 'blockquote', 'address', 'center', 'noframes', 'isindex', 'fieldset', 'table', 'form', 'a', 'br', 'span', 'bdo', 'object', 'applet', 'img', 'map', 'iframe', 'tt', 'i', 'b', 'u', 's', 'strike', 'big', 'small', 'font', 'basefont', 'em', 'strong', 'dfn', 'code', 'q', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'sub', 'sup', 'input', 'select', 'textarea', 'label', 'button', 'noscript', 'ins', 'del', 'script'),
-		'attributes' => array('id', 'class', 'style', 'title', 'lang', 'xml:lang', 'dir', 'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup', 'abbr', 'axis', 'headers', 'scope', 'rowspan', 'colspan', 'align', 'char', 'charoff', 'valign', 'nowrap', 'bgcolor', 'width', 'height'),
-		'default_parent' => 'tr',
-	),
-);
-?>

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Uri/Exception.php

@@ -0,0 +1,37 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Uri
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: Exception.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Exception
+ */
+require_once 'Zend/Exception.php';
+
+/**
+ * Exceptions for Zend_Uri
+ *
+ * @category  Zend
+ * @package   Zend_Uri
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Uri_Exception extends Zend_Exception
+{
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Uri/Http.php

@@ -0,0 +1,769 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Uri
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: Http.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Uri
+ */
+require_once 'Zend/Uri.php';
+
+/**
+ * @see Zend_Validate_Hostname
+ */
+require_once 'Zend/Validate/Hostname.php';
+
+/**
+ * HTTP(S) URI handler
+ *
+ * @category  Zend
+ * @package   Zend_Uri
+ * @uses      Zend_Uri
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Uri_Http extends Zend_Uri
+{
+    /**
+     * Character classes for validation regular expressions
+     */
+    const CHAR_ALNUM    = 'A-Za-z0-9';
+    const CHAR_MARK     = '-_.!~*\'()\[\]';
+    const CHAR_RESERVED = ';\/?:@&=+$,';
+    const CHAR_SEGMENT  = ':@&=+$,;';
+    const CHAR_UNWISE   = '{}|\\\\^`';
+
+    /**
+     * HTTP username
+     *
+     * @var string
+     */
+    protected $_username = '';
+
+    /**
+     * HTTP password
+     *
+     * @var string
+     */
+    protected $_password = '';
+
+    /**
+     * HTTP host
+     *
+     * @var string
+     */
+    protected $_host = '';
+
+    /**
+     * HTTP post
+     *
+     * @var string
+     */
+    protected $_port = '';
+
+    /**
+     * HTTP part
+     *
+     * @var string
+     */
+    protected $_path = '';
+
+    /**
+     * HTTP query
+     *
+     * @var string
+     */
+    protected $_query = '';
+
+    /**
+     * HTTP fragment
+     *
+     * @var string
+     */
+    protected $_fragment = '';
+
+    /**
+     * Regular expression grammar rules for validation; values added by constructor
+     *
+     * @var array
+     */
+    protected $_regex = array();
+
+    /**
+     * Constructor accepts a string $scheme (e.g., http, https) and a scheme-specific part of the URI
+     * (e.g., example.com/path/to/resource?query=param#fragment)
+     *
+     * @param  string $scheme         The scheme of the URI
+     * @param  string $schemeSpecific The scheme-specific part of the URI
+     * @throws Zend_Uri_Exception When the URI is not valid
+     */
+    protected function __construct($scheme, $schemeSpecific = '')
+    {
+        // Set the scheme
+        $this->_scheme = $scheme;
+
+        // Set up grammar rules for validation via regular expressions. These
+        // are to be used with slash-delimited regular expression strings.
+
+        // Escaped special characters (eg. '%25' for '%')
+        $this->_regex['escaped']    = '%[[:xdigit:]]{2}';
+
+        // Unreserved characters
+        $this->_regex['unreserved'] = '[' . self::CHAR_ALNUM . self::CHAR_MARK . ']';
+
+        // Segment can use escaped, unreserved or a set of additional chars
+        $this->_regex['segment']    = '(?:' . $this->_regex['escaped'] . '|[' .
+            self::CHAR_ALNUM . self::CHAR_MARK . self::CHAR_SEGMENT . '])*';
+
+        // Path can be a series of segmets char strings seperated by '/'
+        $this->_regex['path']       = '(?:\/(?:' . $this->_regex['segment'] . ')?)+';
+
+        // URI characters can be escaped, alphanumeric, mark or reserved chars
+        $this->_regex['uric']       = '(?:' . $this->_regex['escaped'] . '|[' .
+            self::CHAR_ALNUM . self::CHAR_MARK . self::CHAR_RESERVED .
+
+        // If unwise chars are allowed, add them to the URI chars class
+            (self::$_config['allow_unwise'] ? self::CHAR_UNWISE : '') . '])';
+
+        // If no scheme-specific part was supplied, the user intends to create
+        // a new URI with this object.  No further parsing is required.
+        if (strlen($schemeSpecific) === 0) {
+            return;
+        }
+
+        // Parse the scheme-specific URI parts into the instance variables.
+        $this->_parseUri($schemeSpecific);
+
+        // Validate the URI
+        if ($this->valid() === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception('Invalid URI supplied');
+        }
+    }
+
+    /**
+     * Creates a Zend_Uri_Http from the given string
+     *
+     * @param  string $uri String to create URI from, must start with
+     *                     'http://' or 'https://'
+     * @throws InvalidArgumentException  When the given $uri is not a string or
+     *                                   does not start with http:// or https://
+     * @throws Zend_Uri_Exception        When the given $uri is invalid
+     * @return Zend_Uri_Http
+     */
+    public static function fromString($uri)
+    {
+        if (is_string($uri) === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception('$uri is not a string');
+        }
+
+        $uri            = explode(':', $uri, 2);
+        $scheme         = strtolower($uri[0]);
+        $schemeSpecific = isset($uri[1]) === true ? $uri[1] : '';
+
+        if (in_array($scheme, array('http', 'https')) === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception("Invalid scheme: '$scheme'");
+        }
+
+        $schemeHandler = new Zend_Uri_Http($scheme, $schemeSpecific);
+        return $schemeHandler;
+    }
+
+    /**
+     * Parse the scheme-specific portion of the URI and place its parts into instance variables.
+     *
+     * @param  string $schemeSpecific The scheme-specific portion to parse
+     * @throws Zend_Uri_Exception When scheme-specific decoposition fails
+     * @throws Zend_Uri_Exception When authority decomposition fails
+     * @return void
+     */
+    protected function _parseUri($schemeSpecific)
+    {
+        // High-level decomposition parser
+        $pattern = '~^((//)([^/?#]*))([^?#]*)(\?([^#]*))?(#(.*))?$~';
+        $status  = @preg_match($pattern, $schemeSpecific, $matches);
+        if ($status === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception('Internal error: scheme-specific decomposition failed');
+        }
+
+        // Failed decomposition; no further processing needed
+        if ($status === false) {
+            return;
+        }
+
+        // Save URI components that need no further decomposition
+        $this->_path     = isset($matches[4]) === true ? $matches[4] : '';
+        $this->_query    = isset($matches[6]) === true ? $matches[6] : '';
+        $this->_fragment = isset($matches[8]) === true ? $matches[8] : '';
+
+        // Additional decomposition to get username, password, host, and port
+        $combo   = isset($matches[3]) === true ? $matches[3] : '';
+        $pattern = '~^(([^:@]*)(:([^@]*))?@)?([^:]+)(:(.*))?$~';
+        $status  = @preg_match($pattern, $combo, $matches);
+        if ($status === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception('Internal error: authority decomposition failed');
+        }
+
+        // Failed decomposition; no further processing needed
+        if ($status === false) {
+            return;
+        }
+
+        // Save remaining URI components
+        $this->_username = isset($matches[2]) === true ? $matches[2] : '';
+        $this->_password = isset($matches[4]) === true ? $matches[4] : '';
+        $this->_host     = isset($matches[5]) === true ? $matches[5] : '';
+        $this->_port     = isset($matches[7]) === true ? $matches[7] : '';
+
+    }
+
+    /**
+     * Returns a URI based on current values of the instance variables. If any
+     * part of the URI does not pass validation, then an exception is thrown.
+     *
+     * @throws Zend_Uri_Exception When one or more parts of the URI are invalid
+     * @return string
+     */
+    public function getUri()
+    {
+        if ($this->valid() === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception('One or more parts of the URI are invalid');
+        }
+
+        $password = strlen($this->_password) > 0 ? ":$this->_password" : '';
+        $auth     = strlen($this->_username) > 0 ? "$this->_username$password@" : '';
+        $port     = strlen($this->_port) > 0 ? ":$this->_port" : '';
+        $query    = strlen($this->_query) > 0 ? "?$this->_query" : '';
+        $fragment = strlen($this->_fragment) > 0 ? "#$this->_fragment" : '';
+
+        return $this->_scheme
+             . '://'
+             . $auth
+             . $this->_host
+             . $port
+             . $this->_path
+             . $query
+             . $fragment;
+    }
+
+    /**
+     * Validate the current URI from the instance variables. Returns true if and only if all
+     * parts pass validation.
+     *
+     * @return boolean
+     */
+    public function valid()
+    {
+        // Return true if and only if all parts of the URI have passed validation
+        return $this->validateUsername()
+           and $this->validatePassword()
+           and $this->validateHost()
+           and $this->validatePort()
+           and $this->validatePath()
+           and $this->validateQuery()
+           and $this->validateFragment();
+    }
+
+    /**
+     * Returns the username portion of the URL, or FALSE if none.
+     *
+     * @return string
+     */
+    public function getUsername()
+    {
+        return strlen($this->_username) > 0 ? $this->_username : false;
+    }
+
+    /**
+     * Returns true if and only if the username passes validation. If no username is passed,
+     * then the username contained in the instance variable is used.
+     *
+     * @param  string $username The HTTP username
+     * @throws Zend_Uri_Exception When username validation fails
+     * @return boolean
+     * @link   http://www.faqs.org/rfcs/rfc2396.html
+     */
+    public function validateUsername($username = null)
+    {
+        if ($username === null) {
+            $username = $this->_username;
+        }
+
+        // If the username is empty, then it is considered valid
+        if (strlen($username) === 0) {
+            return true;
+        }
+
+        // Check the username against the allowed values
+        $status = @preg_match('/^(?:' . $this->_regex['escaped'] . '|[' .
+            self::CHAR_ALNUM . self::CHAR_MARK . ';:&=+$,' . '])+$/', $username);
+
+        if ($status === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception('Internal error: username validation failed');
+        }
+
+        return $status === 1;
+    }
+
+    /**
+     * Sets the username for the current URI, and returns the old username
+     *
+     * @param  string $username The HTTP username
+     * @throws Zend_Uri_Exception When $username is not a valid HTTP username
+     * @return string
+     */
+    public function setUsername($username)
+    {
+        if ($this->validateUsername($username) === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception("Username \"$username\" is not a valid HTTP username");
+        }
+
+        $oldUsername     = $this->_username;
+        $this->_username = $username;
+
+        return $oldUsername;
+    }
+
+    /**
+     * Returns the password portion of the URL, or FALSE if none.
+     *
+     * @return string
+     */
+    public function getPassword()
+    {
+        return strlen($this->_password) > 0 ? $this->_password : false;
+    }
+
+    /**
+     * Returns true if and only if the password passes validation. If no password is passed,
+     * then the password contained in the instance variable is used.
+     *
+     * @param  string $password The HTTP password
+     * @throws Zend_Uri_Exception When password validation fails
+     * @return boolean
+     * @link   http://www.faqs.org/rfcs/rfc2396.html
+     */
+    public function validatePassword($password = null)
+    {
+        if ($password === null) {
+            $password = $this->_password;
+        }
+
+        // If the password is empty, then it is considered valid
+        if (strlen($password) === 0) {
+            return true;
+        }
+
+        // If the password is nonempty, but there is no username, then it is considered invalid
+        if (strlen($password) > 0 and strlen($this->_username) === 0) {
+            return false;
+        }
+
+        // Check the password against the allowed values
+        $status = @preg_match('/^(?:' . $this->_regex['escaped'] . '|[' .
+            self::CHAR_ALNUM . self::CHAR_MARK . ';:&=+$,' . '])+$/', $password);
+
+        if ($status === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception('Internal error: password validation failed.');
+        }
+
+        return $status == 1;
+    }
+
+    /**
+     * Sets the password for the current URI, and returns the old password
+     *
+     * @param  string $password The HTTP password
+     * @throws Zend_Uri_Exception When $password is not a valid HTTP password
+     * @return string
+     */
+    public function setPassword($password)
+    {
+        if ($this->validatePassword($password) === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception("Password \"$password\" is not a valid HTTP password.");
+        }
+
+        $oldPassword     = $this->_password;
+        $this->_password = $password;
+
+        return $oldPassword;
+    }
+
+    /**
+     * Returns the domain or host IP portion of the URL, or FALSE if none.
+     *
+     * @return string
+     */
+    public function getHost()
+    {
+        return strlen($this->_host) > 0 ? $this->_host : false;
+    }
+
+    /**
+     * Returns true if and only if the host string passes validation. If no host is passed,
+     * then the host contained in the instance variable is used.
+     *
+     * @param  string $host The HTTP host
+     * @return boolean
+     * @uses   Zend_Filter
+     */
+    public function validateHost($host = null)
+    {
+        if ($host === null) {
+            $host = $this->_host;
+        }
+
+        // If the host is empty, then it is considered invalid
+        if (strlen($host) === 0) {
+            return false;
+        }
+
+        // Check the host against the allowed values; delegated to Zend_Filter.
+        $validate = new Zend_Validate_Hostname(Zend_Validate_Hostname::ALLOW_ALL);
+
+        return $validate->isValid($host);
+    }
+
+    /**
+     * Sets the host for the current URI, and returns the old host
+     *
+     * @param  string $host The HTTP host
+     * @throws Zend_Uri_Exception When $host is nota valid HTTP host
+     * @return string
+     */
+    public function setHost($host)
+    {
+        if ($this->validateHost($host) === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception("Host \"$host\" is not a valid HTTP host");
+        }
+
+        $oldHost     = $this->_host;
+        $this->_host = $host;
+
+        return $oldHost;
+    }
+
+    /**
+     * Returns the TCP port, or FALSE if none.
+     *
+     * @return string
+     */
+    public function getPort()
+    {
+        return strlen($this->_port) > 0 ? $this->_port : false;
+    }
+
+    /**
+     * Returns true if and only if the TCP port string passes validation. If no port is passed,
+     * then the port contained in the instance variable is used.
+     *
+     * @param  string $port The HTTP port
+     * @return boolean
+     */
+    public function validatePort($port = null)
+    {
+        if ($port === null) {
+            $port = $this->_port;
+        }
+
+        // If the port is empty, then it is considered valid
+        if (strlen($port) === 0) {
+            return true;
+        }
+
+        // Check the port against the allowed values
+        return ctype_digit((string) $port) and 1 <= $port and $port <= 65535;
+    }
+
+    /**
+     * Sets the port for the current URI, and returns the old port
+     *
+     * @param  string $port The HTTP port
+     * @throws Zend_Uri_Exception When $port is not a valid HTTP port
+     * @return string
+     */
+    public function setPort($port)
+    {
+        if ($this->validatePort($port) === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception("Port \"$port\" is not a valid HTTP port.");
+        }
+
+        $oldPort     = $this->_port;
+        $this->_port = $port;
+
+        return $oldPort;
+    }
+
+    /**
+     * Returns the path and filename portion of the URL.
+     *
+     * @return string
+     */
+    public function getPath()
+    {
+        return strlen($this->_path) > 0 ? $this->_path : '/';
+    }
+
+    /**
+     * Returns true if and only if the path string passes validation. If no path is passed,
+     * then the path contained in the instance variable is used.
+     *
+     * @param  string $path The HTTP path
+     * @throws Zend_Uri_Exception When path validation fails
+     * @return boolean
+     */
+    public function validatePath($path = null)
+    {
+        if ($path === null) {
+            $path = $this->_path;
+        }
+
+        // If the path is empty, then it is considered valid
+        if (strlen($path) === 0) {
+            return true;
+        }
+
+        // Determine whether the path is well-formed
+        $pattern = '/^' . $this->_regex['path'] . '$/';
+        $status  = @preg_match($pattern, $path);
+        if ($status === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception('Internal error: path validation failed');
+        }
+
+        return (boolean) $status;
+    }
+
+    /**
+     * Sets the path for the current URI, and returns the old path
+     *
+     * @param  string $path The HTTP path
+     * @throws Zend_Uri_Exception When $path is not a valid HTTP path
+     * @return string
+     */
+    public function setPath($path)
+    {
+        if ($this->validatePath($path) === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception("Path \"$path\" is not a valid HTTP path");
+        }
+
+        $oldPath     = $this->_path;
+        $this->_path = $path;
+
+        return $oldPath;
+    }
+
+    /**
+     * Returns the query portion of the URL (after ?), or FALSE if none.
+     *
+     * @return string
+     */
+    public function getQuery()
+    {
+        return strlen($this->_query) > 0 ? $this->_query : false;
+    }
+
+    /**
+     * Returns the query portion of the URL (after ?) as a
+     * key-value-array. If the query is empty an empty array
+     * is returned
+     *
+     * @return array
+     */
+    public function getQueryAsArray()
+    {
+        $query = $this->getQuery();
+        $querryArray = array();
+        if ($query !== false) {
+            parse_str($query, $querryArray);
+        }
+        return $querryArray;
+    }
+
+    /**
+     * Returns true if and only if the query string passes validation. If no query is passed,
+     * then the query string contained in the instance variable is used.
+     *
+     * @param  string $query The query to validate
+     * @throws Zend_Uri_Exception When query validation fails
+     * @return boolean
+     * @link   http://www.faqs.org/rfcs/rfc2396.html
+     */
+    public function validateQuery($query = null)
+    {
+        if ($query === null) {
+            $query = $this->_query;
+        }
+
+        // If query is empty, it is considered to be valid
+        if (strlen($query) === 0) {
+            return true;
+        }
+
+        // Determine whether the query is well-formed
+        $pattern = '/^' . $this->_regex['uric'] . '*$/';
+        $status  = @preg_match($pattern, $query);
+        if ($status === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception('Internal error: query validation failed');
+        }
+
+        return $status == 1;
+    }
+
+    /**
+     * Add or replace params in the query string for the current URI, and
+     * return the old query.
+     *
+     * @param  array $queryParams
+     * @return string Old query string
+     */
+    public function addReplaceQueryParameters(array $queryParams)
+    {
+        $queryParams = array_merge($this->getQueryAsArray(), $queryParams);
+        return $this->setQuery($queryParams);
+    }
+
+    /**
+     * Remove params in the query string for the current URI, and
+     * return the old query.
+     *
+     * @param  array $queryParamKeys
+     * @return string Old query string
+     */
+    public function removeQueryParameters(array $queryParamKeys)
+    {
+        $queryParams = array_diff_key($this->getQueryAsArray(), array_fill_keys($queryParamKeys, 0));
+        return $this->setQuery($queryParams);
+    }
+
+    /**
+     * Set the query string for the current URI, and return the old query
+     * string This method accepts both strings and arrays.
+     *
+     * @param  string|array $query The query string or array
+     * @throws Zend_Uri_Exception When $query is not a valid query string
+     * @return string              Old query string
+     */
+    public function setQuery($query)
+    {
+        $oldQuery = $this->_query;
+
+        // If query is empty, set an empty string
+        if (empty($query) === true) {
+            $this->_query = '';
+            return $oldQuery;
+        }
+
+        // If query is an array, make a string out of it
+        if (is_array($query) === true) {
+            $query = http_build_query($query, '', '&');
+        } else {
+            // If it is a string, make sure it is valid. If not parse and encode it
+            $query = (string) $query;
+            if ($this->validateQuery($query) === false) {
+                parse_str($query, $queryArray);
+                $query = http_build_query($queryArray, '', '&');
+            }
+        }
+
+        // Make sure the query is valid, and set it
+        if ($this->validateQuery($query) === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception("'$query' is not a valid query string");
+        }
+
+        $this->_query = $query;
+
+        return $oldQuery;
+    }
+
+    /**
+     * Returns the fragment portion of the URL (after #), or FALSE if none.
+     *
+     * @return string|false
+     */
+    public function getFragment()
+    {
+        return strlen($this->_fragment) > 0 ? $this->_fragment : false;
+    }
+
+    /**
+     * Returns true if and only if the fragment passes validation. If no fragment is passed,
+     * then the fragment contained in the instance variable is used.
+     *
+     * @param  string $fragment Fragment of an URI
+     * @throws Zend_Uri_Exception When fragment validation fails
+     * @return boolean
+     * @link   http://www.faqs.org/rfcs/rfc2396.html
+     */
+    public function validateFragment($fragment = null)
+    {
+        if ($fragment === null) {
+            $fragment = $this->_fragment;
+        }
+
+        // If fragment is empty, it is considered to be valid
+        if (strlen($fragment) === 0) {
+            return true;
+        }
+
+        // Determine whether the fragment is well-formed
+        $pattern = '/^' . $this->_regex['uric'] . '*$/';
+        $status  = @preg_match($pattern, $fragment);
+        if ($status === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception('Internal error: fragment validation failed');
+        }
+
+        return (boolean) $status;
+    }
+
+    /**
+     * Sets the fragment for the current URI, and returns the old fragment
+     *
+     * @param  string $fragment Fragment of the current URI
+     * @throws Zend_Uri_Exception When $fragment is not a valid HTTP fragment
+     * @return string
+     */
+    public function setFragment($fragment)
+    {
+        if ($this->validateFragment($fragment) === false) {
+            require_once 'Zend/Uri/Exception.php';
+            throw new Zend_Uri_Exception("Fragment \"$fragment\" is not a valid HTTP fragment");
+        }
+
+        $oldFragment     = $this->_fragment;
+        $this->_fragment = $fragment;
+
+        return $oldFragment;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Loader/Autoloader/Interface.php

@@ -0,0 +1,43 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Loader
+ * @subpackage Autoloader
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @version    $Id: Interface.php 23775 2011-03-01 17:25:24Z ralph $
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+/**
+ * Autoloader interface
+ *
+ * @package    Zend_Loader
+ * @subpackage Autoloader
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+interface Zend_Loader_Autoloader_Interface
+{
+    /**
+     * Autoload a class
+     *
+     * @abstract
+     * @param   string $class
+     * @return  mixed
+     *          False [if unable to load $class]
+     *          get_class($class) [if $class is successfully loaded]
+     */
+    public function autoload($class);
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Loader/Autoloader/Resource.php

@@ -0,0 +1,467 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Loader
+ * @subpackage Autoloader
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @version    $Id: Resource.php 23775 2011-03-01 17:25:24Z ralph $
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+/** Zend_Loader_Autoloader_Interface */
+require_once 'Zend/Loader/Autoloader/Interface.php';
+
+/**
+ * Resource loader
+ *
+ * @uses       Zend_Loader_Autoloader_Interface
+ * @package    Zend_Loader
+ * @subpackage Autoloader
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Loader_Autoloader_Resource implements Zend_Loader_Autoloader_Interface
+{
+    /**
+     * @var string Base path to resource classes
+     */
+    protected $_basePath;
+
+    /**
+     * @var array Components handled within this resource
+     */
+    protected $_components = array();
+
+    /**
+     * @var string Default resource/component to use when using object registry
+     */
+    protected $_defaultResourceType;
+
+    /**
+     * @var string Namespace of classes within this resource
+     */
+    protected $_namespace;
+
+    /**
+     * @var array Available resource types handled by this resource autoloader
+     */
+    protected $_resourceTypes = array();
+
+    /**
+     * Constructor
+     *
+     * @param  array|Zend_Config $options Configuration options for resource autoloader
+     * @return void
+     */
+    public function __construct($options)
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        }
+        if (!is_array($options)) {
+            require_once 'Zend/Loader/Exception.php';
+            throw new Zend_Loader_Exception('Options must be passed to resource loader constructor');
+        }
+
+        $this->setOptions($options);
+
+        $namespace = $this->getNamespace();
+        if ((null === $namespace)
+            || (null === $this->getBasePath())
+        ) {
+            require_once 'Zend/Loader/Exception.php';
+            throw new Zend_Loader_Exception('Resource loader requires both a namespace and a base path for initialization');
+        }
+
+        if (!empty($namespace)) {
+            $namespace .= '_';
+        }
+        require_once 'Zend/Loader/Autoloader.php';
+        Zend_Loader_Autoloader::getInstance()->unshiftAutoloader($this, $namespace);
+    }
+
+    /**
+     * Overloading: methods
+     *
+     * Allow retrieving concrete resource object instances using 'get<Resourcename>()'
+     * syntax. Example:
+     * <code>
+     * $loader = new Zend_Loader_Autoloader_Resource(array(
+     *     'namespace' => 'Stuff_',
+     *     'basePath'  => '/path/to/some/stuff',
+     * ))
+     * $loader->addResourceType('Model', 'models', 'Model');
+     *
+     * $foo = $loader->getModel('Foo'); // get instance of Stuff_Model_Foo class
+     * </code>
+     *
+     * @param  string $method
+     * @param  array $args
+     * @return mixed
+     * @throws Zend_Loader_Exception if method not beginning with 'get' or not matching a valid resource type is called
+     */
+    public function __call($method, $args)
+    {
+        if ('get' == substr($method, 0, 3)) {
+            $type  = strtolower(substr($method, 3));
+            if (!$this->hasResourceType($type)) {
+                require_once 'Zend/Loader/Exception.php';
+                throw new Zend_Loader_Exception("Invalid resource type $type; cannot load resource");
+            }
+            if (empty($args)) {
+                require_once 'Zend/Loader/Exception.php';
+                throw new Zend_Loader_Exception("Cannot load resources; no resource specified");
+            }
+            $resource = array_shift($args);
+            return $this->load($resource, $type);
+        }
+
+        require_once 'Zend/Loader/Exception.php';
+        throw new Zend_Loader_Exception("Method '$method' is not supported");
+    }
+
+    /**
+     * Helper method to calculate the correct class path
+     *
+     * @param string $class
+     * @return False if not matched other wise the correct path
+     */
+    public function getClassPath($class)
+    {
+        $segments          = explode('_', $class);
+        $namespaceTopLevel = $this->getNamespace();
+        $namespace         = '';
+
+        if (!empty($namespaceTopLevel)) {
+            $namespace = array_shift($segments);
+            if ($namespace != $namespaceTopLevel) {
+                // wrong prefix? we're done
+                return false;
+            }
+        }
+
+        if (count($segments) < 2) {
+            // assumes all resources have a component and class name, minimum
+            return false;
+        }
+
+        $final     = array_pop($segments);
+        $component = $namespace;
+        $lastMatch = false;
+        do {
+            $segment    = array_shift($segments);
+            $component .= empty($component) ? $segment : '_' . $segment;
+            if (isset($this->_components[$component])) {
+                $lastMatch = $component;
+            }
+        } while (count($segments));
+
+        if (!$lastMatch) {
+            return false;
+        }
+
+        $final = substr($class, strlen($lastMatch) + 1);
+        $path = $this->_components[$lastMatch];
+        $classPath = $path . '/' . str_replace('_', '/', $final) . '.php';
+
+        if (Zend_Loader::isReadable($classPath)) {
+            return $classPath;
+        }
+
+        return false;
+    }
+
+    /**
+     * Attempt to autoload a class
+     *
+     * @param  string $class
+     * @return mixed False if not matched, otherwise result if include operation
+     */
+    public function autoload($class)
+    {
+        $classPath = $this->getClassPath($class);
+        if (false !== $classPath) {
+            return include $classPath;
+        }
+        return false;
+    }
+
+    /**
+     * Set class state from options
+     *
+     * @param  array $options
+     * @return Zend_Loader_Autoloader_Resource
+     */
+    public function setOptions(array $options)
+    {
+        // Set namespace first, see ZF-10836
+        if (isset($options['namespace'])) {
+            $this->setNamespace($options['namespace']);
+            unset($options['namespace']);
+        }
+
+        $methods = get_class_methods($this);
+        foreach ($options as $key => $value) {
+            $method = 'set' . ucfirst($key);
+            if (in_array($method, $methods)) {
+                $this->$method($value);
+            }
+        }
+        return $this;
+    }
+
+    /**
+     * Set namespace that this autoloader handles
+     *
+     * @param  string $namespace
+     * @return Zend_Loader_Autoloader_Resource
+     */
+    public function setNamespace($namespace)
+    {
+        $this->_namespace = rtrim((string) $namespace, '_');
+        return $this;
+    }
+
+    /**
+     * Get namespace this autoloader handles
+     *
+     * @return string
+     */
+    public function getNamespace()
+    {
+        return $this->_namespace;
+    }
+
+    /**
+     * Set base path for this set of resources
+     *
+     * @param  string $path
+     * @return Zend_Loader_Autoloader_Resource
+     */
+    public function setBasePath($path)
+    {
+        $this->_basePath = (string) $path;
+        return $this;
+    }
+
+    /**
+     * Get base path to this set of resources
+     *
+     * @return string
+     */
+    public function getBasePath()
+    {
+        return $this->_basePath;
+    }
+
+    /**
+     * Add resource type
+     *
+     * @param  string $type identifier for the resource type being loaded
+     * @param  string $path path relative to resource base path containing the resource types
+     * @param  null|string $namespace sub-component namespace to append to base namespace that qualifies this resource type
+     * @return Zend_Loader_Autoloader_Resource
+     */
+    public function addResourceType($type, $path, $namespace = null)
+    {
+        $type = strtolower($type);
+        if (!isset($this->_resourceTypes[$type])) {
+            if (null === $namespace) {
+                require_once 'Zend/Loader/Exception.php';
+                throw new Zend_Loader_Exception('Initial definition of a resource type must include a namespace');
+            }
+            $namespaceTopLevel = $this->getNamespace();
+            $namespace = ucfirst(trim($namespace, '_'));
+            $this->_resourceTypes[$type] = array(
+                'namespace' => empty($namespaceTopLevel) ? $namespace : $namespaceTopLevel . '_' . $namespace,
+            );
+        }
+        if (!is_string($path)) {
+            require_once 'Zend/Loader/Exception.php';
+            throw new Zend_Loader_Exception('Invalid path specification provided; must be string');
+        }
+        $this->_resourceTypes[$type]['path'] = $this->getBasePath() . '/' . rtrim($path, '\/');
+
+        $component = $this->_resourceTypes[$type]['namespace'];
+        $this->_components[$component] = $this->_resourceTypes[$type]['path'];
+        return $this;
+    }
+
+    /**
+     * Add multiple resources at once
+     *
+     * $types should be an associative array of resource type => specification
+     * pairs. Each specification should be an associative array containing
+     * minimally the 'path' key (specifying the path relative to the resource
+     * base path) and optionally the 'namespace' key (indicating the subcomponent
+     * namespace to append to the resource namespace).
+     *
+     * As an example:
+     * <code>
+     * $loader->addResourceTypes(array(
+     *     'model' => array(
+     *         'path'      => 'models',
+     *         'namespace' => 'Model',
+     *     ),
+     *     'form' => array(
+     *         'path'      => 'forms',
+     *         'namespace' => 'Form',
+     *     ),
+     * ));
+     * </code>
+     *
+     * @param  array $types
+     * @return Zend_Loader_Autoloader_Resource
+     */
+    public function addResourceTypes(array $types)
+    {
+        foreach ($types as $type => $spec) {
+            if (!is_array($spec)) {
+                require_once 'Zend/Loader/Exception.php';
+                throw new Zend_Loader_Exception('addResourceTypes() expects an array of arrays');
+            }
+            if (!isset($spec['path'])) {
+                require_once 'Zend/Loader/Exception.php';
+                throw new Zend_Loader_Exception('addResourceTypes() expects each array to include a paths element');
+            }
+            $paths  = $spec['path'];
+            $namespace = null;
+            if (isset($spec['namespace'])) {
+                $namespace = $spec['namespace'];
+            }
+            $this->addResourceType($type, $paths, $namespace);
+        }
+        return $this;
+    }
+
+    /**
+     * Overwrite existing and set multiple resource types at once
+     *
+     * @see    Zend_Loader_Autoloader_Resource::addResourceTypes()
+     * @param  array $types
+     * @return Zend_Loader_Autoloader_Resource
+     */
+    public function setResourceTypes(array $types)
+    {
+        $this->clearResourceTypes();
+        return $this->addResourceTypes($types);
+    }
+
+    /**
+     * Retrieve resource type mappings
+     *
+     * @return array
+     */
+    public function getResourceTypes()
+    {
+        return $this->_resourceTypes;
+    }
+
+    /**
+     * Is the requested resource type defined?
+     *
+     * @param  string $type
+     * @return bool
+     */
+    public function hasResourceType($type)
+    {
+        return isset($this->_resourceTypes[$type]);
+    }
+
+    /**
+     * Remove the requested resource type
+     *
+     * @param  string $type
+     * @return Zend_Loader_Autoloader_Resource
+     */
+    public function removeResourceType($type)
+    {
+        if ($this->hasResourceType($type)) {
+            $namespace = $this->_resourceTypes[$type]['namespace'];
+            unset($this->_components[$namespace]);
+            unset($this->_resourceTypes[$type]);
+        }
+        return $this;
+    }
+
+    /**
+     * Clear all resource types
+     *
+     * @return Zend_Loader_Autoloader_Resource
+     */
+    public function clearResourceTypes()
+    {
+        $this->_resourceTypes = array();
+        $this->_components    = array();
+        return $this;
+    }
+
+    /**
+     * Set default resource type to use when calling load()
+     *
+     * @param  string $type
+     * @return Zend_Loader_Autoloader_Resource
+     */
+    public function setDefaultResourceType($type)
+    {
+        if ($this->hasResourceType($type)) {
+            $this->_defaultResourceType = $type;
+        }
+        return $this;
+    }
+
+    /**
+     * Get default resource type to use when calling load()
+     *
+     * @return string|null
+     */
+    public function getDefaultResourceType()
+    {
+        return $this->_defaultResourceType;
+    }
+
+    /**
+     * Object registry and factory
+     *
+     * Loads the requested resource of type $type (or uses the default resource
+     * type if none provided). If the resource has been loaded previously,
+     * returns the previous instance; otherwise, instantiates it.
+     *
+     * @param  string $resource
+     * @param  string $type
+     * @return object
+     * @throws Zend_Loader_Exception if resource type not specified or invalid
+     */
+    public function load($resource, $type = null)
+    {
+        if (null === $type) {
+            $type = $this->getDefaultResourceType();
+            if (empty($type)) {
+                require_once 'Zend/Loader/Exception.php';
+                throw new Zend_Loader_Exception('No resource type specified');
+            }
+        }
+        if (!$this->hasResourceType($type)) {
+            require_once 'Zend/Loader/Exception.php';
+            throw new Zend_Loader_Exception('Invalid resource type specified');
+        }
+        $namespace = $this->_resourceTypes[$type]['namespace'];
+        $class     = $namespace . '_' . ucfirst($resource);
+        if (!isset($this->_resources[$class])) {
+            $this->_resources[$class] = new $class;
+        }
+        return $this->_resources[$class];
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Loader/Exception.php

@@ -0,0 +1,35 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Loader
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Exception.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Exception
+ */
+require_once 'Zend/Exception.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Loader
+ * @uses       Zend_Exception
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Loader_Exception extends Zend_Exception
+{}
\ No newline at end of file

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Loader/Autoloader.php

@@ -0,0 +1,582 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Loader
+ * @subpackage Autoloader
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @version    $Id: Autoloader.php 23775 2011-03-01 17:25:24Z ralph $
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+/** Zend_Loader */
+require_once 'Zend/Loader.php';
+
+/**
+ * Autoloader stack and namespace autoloader
+ *
+ * @uses       Zend_Loader_Autoloader
+ * @package    Zend_Loader
+ * @subpackage Autoloader
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Loader_Autoloader
+{
+    /**
+     * @var Zend_Loader_Autoloader Singleton instance
+     */
+    protected static $_instance;
+
+    /**
+     * @var array Concrete autoloader callback implementations
+     */
+    protected $_autoloaders = array();
+
+    /**
+     * @var array Default autoloader callback
+     */
+    protected $_defaultAutoloader = array('Zend_Loader', 'loadClass');
+
+    /**
+     * @var bool Whether or not to act as a fallback autoloader
+     */
+    protected $_fallbackAutoloader = false;
+
+    /**
+     * @var array Callback for internal autoloader implementation
+     */
+    protected $_internalAutoloader;
+
+    /**
+     * @var array Supported namespaces 'Zend' and 'ZendX' by default.
+     */
+    protected $_namespaces = array(
+        'Zend_'  => true,
+        'ZendX_' => true,
+    );
+
+    /**
+     * @var array Namespace-specific autoloaders
+     */
+    protected $_namespaceAutoloaders = array();
+
+    /**
+     * @var bool Whether or not to suppress file not found warnings
+     */
+    protected $_suppressNotFoundWarnings = false;
+
+    /**
+     * @var null|string
+     */
+    protected $_zfPath;
+
+    /**
+     * Retrieve singleton instance
+     *
+     * @return Zend_Loader_Autoloader
+     */
+    public static function getInstance()
+    {
+        if (null === self::$_instance) {
+            self::$_instance = new self();
+        }
+        return self::$_instance;
+    }
+
+    /**
+     * Reset the singleton instance
+     *
+     * @return void
+     */
+    public static function resetInstance()
+    {
+        self::$_instance = null;
+    }
+
+    /**
+     * Autoload a class
+     *
+     * @param  string $class
+     * @return bool
+     */
+    public static function autoload($class)
+    {
+        $self = self::getInstance();
+
+        foreach ($self->getClassAutoloaders($class) as $autoloader) {
+            if ($autoloader instanceof Zend_Loader_Autoloader_Interface) {
+                if ($autoloader->autoload($class)) {
+                    return true;
+                }
+            } elseif (is_array($autoloader)) {
+                if (call_user_func($autoloader, $class)) {
+                    return true;
+                }
+            } elseif (is_string($autoloader) || is_callable($autoloader)) {
+                if ($autoloader($class)) {
+                    return true;
+                }
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * Set the default autoloader implementation
+     *
+     * @param  string|array $callback PHP callback
+     * @return void
+     */
+    public function setDefaultAutoloader($callback)
+    {
+        if (!is_callable($callback)) {
+            throw new Zend_Loader_Exception('Invalid callback specified for default autoloader');
+        }
+
+        $this->_defaultAutoloader = $callback;
+        return $this;
+    }
+
+    /**
+     * Retrieve the default autoloader callback
+     *
+     * @return string|array PHP Callback
+     */
+    public function getDefaultAutoloader()
+    {
+        return $this->_defaultAutoloader;
+    }
+
+    /**
+     * Set several autoloader callbacks at once
+     *
+     * @param  array $autoloaders Array of PHP callbacks (or Zend_Loader_Autoloader_Interface implementations) to act as autoloaders
+     * @return Zend_Loader_Autoloader
+     */
+    public function setAutoloaders(array $autoloaders)
+    {
+        $this->_autoloaders = $autoloaders;
+        return $this;
+    }
+
+    /**
+     * Get attached autoloader implementations
+     *
+     * @return array
+     */
+    public function getAutoloaders()
+    {
+        return $this->_autoloaders;
+    }
+
+    /**
+     * Return all autoloaders for a given namespace
+     *
+     * @param  string $namespace
+     * @return array
+     */
+    public function getNamespaceAutoloaders($namespace)
+    {
+        $namespace = (string) $namespace;
+        if (!array_key_exists($namespace, $this->_namespaceAutoloaders)) {
+            return array();
+        }
+        return $this->_namespaceAutoloaders[$namespace];
+    }
+
+    /**
+     * Register a namespace to autoload
+     *
+     * @param  string|array $namespace
+     * @return Zend_Loader_Autoloader
+     */
+    public function registerNamespace($namespace)
+    {
+        if (is_string($namespace)) {
+            $namespace = (array) $namespace;
+        } elseif (!is_array($namespace)) {
+            throw new Zend_Loader_Exception('Invalid namespace provided');
+        }
+
+        foreach ($namespace as $ns) {
+            if (!isset($this->_namespaces[$ns])) {
+                $this->_namespaces[$ns] = true;
+            }
+        }
+        return $this;
+    }
+
+    /**
+     * Unload a registered autoload namespace
+     *
+     * @param  string|array $namespace
+     * @return Zend_Loader_Autoloader
+     */
+    public function unregisterNamespace($namespace)
+    {
+        if (is_string($namespace)) {
+            $namespace = (array) $namespace;
+        } elseif (!is_array($namespace)) {
+            throw new Zend_Loader_Exception('Invalid namespace provided');
+        }
+
+        foreach ($namespace as $ns) {
+            if (isset($this->_namespaces[$ns])) {
+                unset($this->_namespaces[$ns]);
+            }
+        }
+        return $this;
+    }
+
+    /**
+     * Get a list of registered autoload namespaces
+     *
+     * @return array
+     */
+    public function getRegisteredNamespaces()
+    {
+        return array_keys($this->_namespaces);
+    }
+
+    public function setZfPath($spec, $version = 'latest')
+    {
+        $path = $spec;
+        if (is_array($spec)) {
+            if (!isset($spec['path'])) {
+                throw new Zend_Loader_Exception('No path specified for ZF');
+            }
+            $path = $spec['path'];
+            if (isset($spec['version'])) {
+                $version = $spec['version'];
+            }
+        }
+
+        $this->_zfPath = $this->_getVersionPath($path, $version);
+        set_include_path(implode(PATH_SEPARATOR, array(
+            $this->_zfPath,
+            get_include_path(),
+        )));
+        return $this;
+    }
+
+    public function getZfPath()
+    {
+        return $this->_zfPath;
+    }
+
+    /**
+     * Get or set the value of the "suppress not found warnings" flag
+     *
+     * @param  null|bool $flag
+     * @return bool|Zend_Loader_Autoloader Returns boolean if no argument is passed, object instance otherwise
+     */
+    public function suppressNotFoundWarnings($flag = null)
+    {
+        if (null === $flag) {
+            return $this->_suppressNotFoundWarnings;
+        }
+        $this->_suppressNotFoundWarnings = (bool) $flag;
+        return $this;
+    }
+
+    /**
+     * Indicate whether or not this autoloader should be a fallback autoloader
+     *
+     * @param  bool $flag
+     * @return Zend_Loader_Autoloader
+     */
+    public function setFallbackAutoloader($flag)
+    {
+        $this->_fallbackAutoloader = (bool) $flag;
+        return $this;
+    }
+
+    /**
+     * Is this instance acting as a fallback autoloader?
+     *
+     * @return bool
+     */
+    public function isFallbackAutoloader()
+    {
+        return $this->_fallbackAutoloader;
+    }
+
+    /**
+     * Get autoloaders to use when matching class
+     *
+     * Determines if the class matches a registered namespace, and, if so,
+     * returns only the autoloaders for that namespace. Otherwise, it returns
+     * all non-namespaced autoloaders.
+     *
+     * @param  string $class
+     * @return array Array of autoloaders to use
+     */
+    public function getClassAutoloaders($class)
+    {
+        $namespace   = false;
+        $autoloaders = array();
+
+        // Add concrete namespaced autoloaders
+        foreach (array_keys($this->_namespaceAutoloaders) as $ns) {
+            if ('' == $ns) {
+                continue;
+            }
+            if (0 === strpos($class, $ns)) {
+                $namespace   = $ns;
+                $autoloaders = $autoloaders + $this->getNamespaceAutoloaders($ns);
+                break;
+            }
+        }
+
+        // Add internal namespaced autoloader
+        foreach ($this->getRegisteredNamespaces() as $ns) {
+            if (0 === strpos($class, $ns)) {
+                $namespace     = $ns;
+                $autoloaders[] = $this->_internalAutoloader;
+                break;
+            }
+        }
+
+        // Add non-namespaced autoloaders
+        $autoloaders = $autoloaders + $this->getNamespaceAutoloaders('');
+
+        // Add fallback autoloader
+        if (!$namespace && $this->isFallbackAutoloader()) {
+            $autoloaders[] = $this->_internalAutoloader;
+        }
+
+        return $autoloaders;
+    }
+
+    /**
+     * Add an autoloader to the beginning of the stack
+     *
+     * @param  object|array|string $callback PHP callback or Zend_Loader_Autoloader_Interface implementation
+     * @param  string|array $namespace Specific namespace(s) under which to register callback
+     * @return Zend_Loader_Autoloader
+     */
+    public function unshiftAutoloader($callback, $namespace = '')
+    {
+        $autoloaders = $this->getAutoloaders();
+        array_unshift($autoloaders, $callback);
+        $this->setAutoloaders($autoloaders);
+
+        $namespace = (array) $namespace;
+        foreach ($namespace as $ns) {
+            $autoloaders = $this->getNamespaceAutoloaders($ns);
+            array_unshift($autoloaders, $callback);
+            $this->_setNamespaceAutoloaders($autoloaders, $ns);
+        }
+
+        return $this;
+    }
+
+    /**
+     * Append an autoloader to the autoloader stack
+     *
+     * @param  object|array|string $callback PHP callback or Zend_Loader_Autoloader_Interface implementation
+     * @param  string|array $namespace Specific namespace(s) under which to register callback
+     * @return Zend_Loader_Autoloader
+     */
+    public function pushAutoloader($callback, $namespace = '')
+    {
+        $autoloaders = $this->getAutoloaders();
+        array_push($autoloaders, $callback);
+        $this->setAutoloaders($autoloaders);
+
+        $namespace = (array) $namespace;
+        foreach ($namespace as $ns) {
+            $autoloaders = $this->getNamespaceAutoloaders($ns);
+            array_push($autoloaders, $callback);
+            $this->_setNamespaceAutoloaders($autoloaders, $ns);
+        }
+
+        return $this;
+    }
+
+    /**
+     * Remove an autoloader from the autoloader stack
+     *
+     * @param  object|array|string $callback PHP callback or Zend_Loader_Autoloader_Interface implementation
+     * @param  null|string|array $namespace Specific namespace(s) from which to remove autoloader
+     * @return Zend_Loader_Autoloader
+     */
+    public function removeAutoloader($callback, $namespace = null)
+    {
+        if (null === $namespace) {
+            $autoloaders = $this->getAutoloaders();
+            if (false !== ($index = array_search($callback, $autoloaders, true))) {
+                unset($autoloaders[$index]);
+                $this->setAutoloaders($autoloaders);
+            }
+
+            foreach ($this->_namespaceAutoloaders as $ns => $autoloaders) {
+                if (false !== ($index = array_search($callback, $autoloaders, true))) {
+                    unset($autoloaders[$index]);
+                    $this->_setNamespaceAutoloaders($autoloaders, $ns);
+                }
+            }
+        } else {
+            $namespace = (array) $namespace;
+            foreach ($namespace as $ns) {
+                $autoloaders = $this->getNamespaceAutoloaders($ns);
+                if (false !== ($index = array_search($callback, $autoloaders, true))) {
+                    unset($autoloaders[$index]);
+                    $this->_setNamespaceAutoloaders($autoloaders, $ns);
+                }
+            }
+        }
+
+        return $this;
+    }
+
+    /**
+     * Constructor
+     *
+     * Registers instance with spl_autoload stack
+     *
+     * @return void
+     */
+    protected function __construct()
+    {
+        spl_autoload_register(array(__CLASS__, 'autoload'));
+        $this->_internalAutoloader = array($this, '_autoload');
+    }
+
+    /**
+     * Internal autoloader implementation
+     *
+     * @param  string $class
+     * @return bool
+     */
+    protected function _autoload($class)
+    {
+        $callback = $this->getDefaultAutoloader();
+        try {
+            if ($this->suppressNotFoundWarnings()) {
+                @call_user_func($callback, $class);
+            } else {
+                call_user_func($callback, $class);
+            }
+            return $class;
+        } catch (Zend_Exception $e) {
+            return false;
+        }
+    }
+
+    /**
+     * Set autoloaders for a specific namespace
+     *
+     * @param  array $autoloaders
+     * @param  string $namespace
+     * @return Zend_Loader_Autoloader
+     */
+    protected function _setNamespaceAutoloaders(array $autoloaders, $namespace = '')
+    {
+        $namespace = (string) $namespace;
+        $this->_namespaceAutoloaders[$namespace] = $autoloaders;
+        return $this;
+    }
+
+    /**
+     * Retrieve the filesystem path for the requested ZF version
+     *
+     * @param  string $path
+     * @param  string $version
+     * @return void
+     */
+    protected function _getVersionPath($path, $version)
+    {
+        $type = $this->_getVersionType($version);
+
+        if ($type == 'latest') {
+            $version = 'latest';
+        }
+
+        $availableVersions = $this->_getAvailableVersions($path, $version);
+        if (empty($availableVersions)) {
+            throw new Zend_Loader_Exception('No valid ZF installations discovered');
+        }
+
+        $matchedVersion = array_pop($availableVersions);
+        return $matchedVersion;
+    }
+
+    /**
+     * Retrieve the ZF version type
+     *
+     * @param  string $version
+     * @return string "latest", "major", "minor", or "specific"
+     * @throws Zend_Loader_Exception if version string contains too many dots
+     */
+    protected function _getVersionType($version)
+    {
+        if (strtolower($version) == 'latest') {
+            return 'latest';
+        }
+
+        $parts = explode('.', $version);
+        $count = count($parts);
+        if (1 == $count) {
+            return 'major';
+        }
+        if (2 == $count) {
+            return 'minor';
+        }
+        if (3 < $count) {
+            throw new Zend_Loader_Exception('Invalid version string provided');
+        }
+        return 'specific';
+    }
+
+    /**
+     * Get available versions for the version type requested
+     *
+     * @param  string $path
+     * @param  string $version
+     * @return array
+     */
+    protected function _getAvailableVersions($path, $version)
+    {
+        if (!is_dir($path)) {
+            throw new Zend_Loader_Exception('Invalid ZF path provided');
+        }
+
+        $path       = rtrim($path, '/');
+        $path       = rtrim($path, '\\');
+        $versionLen = strlen($version);
+        $versions   = array();
+        $dirs       = glob("$path/*", GLOB_ONLYDIR);
+        foreach ((array) $dirs as $dir) {
+            $dirName = substr($dir, strlen($path) + 1);
+            if (!preg_match('/^(?:ZendFramework-)?(\d+\.\d+\.\d+((a|b|pl|pr|p|rc)\d+)?)(?:-minimal)?$/i', $dirName, $matches)) {
+                continue;
+            }
+
+            $matchedVersion = $matches[1];
+
+            if (('latest' == $version)
+                || ((strlen($matchedVersion) >= $versionLen)
+                    && (0 === strpos($matchedVersion, $version)))
+            ) {
+                $versions[$matchedVersion] = $dir . '/library';
+            }
+        }
+
+        uksort($versions, 'version_compare');
+        return $versions;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Loader/PluginLoader/Exception.php

@@ -0,0 +1,39 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Loader
+ * @subpackage PluginLoader
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Exception.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Loader_Exception
+ */
+require_once 'Zend/Loader/Exception.php';
+
+/**
+ * Plugin class loader exceptions
+ *
+ * @category   Zend
+ * @package    Zend_Loader
+ * @subpackage PluginLoader
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Loader_PluginLoader_Exception extends Zend_Loader_Exception
+{
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Loader/PluginLoader/Interface.php

@@ -0,0 +1,75 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Loader
+ * @subpackage PluginLoader
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Interface.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * Plugin class loader interface
+ *
+ * @category   Zend
+ * @package    Zend_Loader
+ * @subpackage PluginLoader
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+interface Zend_Loader_PluginLoader_Interface
+{
+    /**
+     * Add prefixed paths to the registry of paths
+     *
+     * @param string $prefix
+     * @param string $path
+     * @return Zend_Loader_PluginLoader
+     */
+    public function addPrefixPath($prefix, $path);
+
+    /**
+     * Remove a prefix (or prefixed-path) from the registry
+     *
+     * @param string $prefix
+     * @param string $path OPTIONAL
+     * @return Zend_Loader_PluginLoader
+     */
+    public function removePrefixPath($prefix, $path = null);
+
+    /**
+     * Whether or not a Helper by a specific name
+     *
+     * @param string $name
+     * @return Zend_Loader_PluginLoader
+     */
+    public function isLoaded($name);
+
+    /**
+     * Return full class name for a named helper
+     *
+     * @param string $name
+     * @return string
+     */
+    public function getClassName($name);
+
+    /**
+     * Load a helper via the name provided
+     *
+     * @param string $name
+     * @return string
+     */
+    public function load($name);
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Loader/PluginLoader.php

@@ -0,0 +1,484 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Loader
+ * @subpackage PluginLoader
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: PluginLoader.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/** Zend_Loader_PluginLoader_Interface */
+require_once 'Zend/Loader/PluginLoader/Interface.php';
+
+/** Zend_Loader */
+require_once 'Zend/Loader.php';
+
+/**
+ * Generic plugin class loader
+ *
+ * @category   Zend
+ * @package    Zend_Loader
+ * @subpackage PluginLoader
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Loader_PluginLoader implements Zend_Loader_PluginLoader_Interface
+{
+    /**
+     * Class map cache file
+     * @var string
+     */
+    protected static $_includeFileCache;
+
+    /**
+     * Instance loaded plugin paths
+     *
+     * @var array
+     */
+    protected $_loadedPluginPaths = array();
+
+    /**
+     * Instance loaded plugins
+     *
+     * @var array
+     */
+    protected $_loadedPlugins = array();
+
+    /**
+     * Instance registry property
+     *
+     * @var array
+     */
+    protected $_prefixToPaths = array();
+
+    /**
+     * Statically loaded plugin path mappings
+     *
+     * @var array
+     */
+    protected static $_staticLoadedPluginPaths = array();
+
+    /**
+     * Statically loaded plugins
+     *
+     * @var array
+     */
+    protected static $_staticLoadedPlugins = array();
+
+    /**
+     * Static registry property
+     *
+     * @var array
+     */
+    protected static $_staticPrefixToPaths = array();
+
+    /**
+     * Whether to use a statically named registry for loading plugins
+     *
+     * @var string|null
+     */
+    protected $_useStaticRegistry = null;
+
+    /**
+     * Constructor
+     *
+     * @param array $prefixToPaths
+     * @param string $staticRegistryName OPTIONAL
+     */
+    public function __construct(Array $prefixToPaths = array(), $staticRegistryName = null)
+    {
+        if (is_string($staticRegistryName) && !empty($staticRegistryName)) {
+            $this->_useStaticRegistry = $staticRegistryName;
+            if(!isset(self::$_staticPrefixToPaths[$staticRegistryName])) {
+                self::$_staticPrefixToPaths[$staticRegistryName] = array();
+            }
+            if(!isset(self::$_staticLoadedPlugins[$staticRegistryName])) {
+                self::$_staticLoadedPlugins[$staticRegistryName] = array();
+            }
+        }
+
+        foreach ($prefixToPaths as $prefix => $path) {
+            $this->addPrefixPath($prefix, $path);
+        }
+    }
+
+    /**
+     * Format prefix for internal use
+     *
+     * @param  string $prefix
+     * @return string
+     */
+    protected function _formatPrefix($prefix)
+    {
+        if($prefix == "") {
+            return $prefix;
+        }
+
+        $last = strlen($prefix) - 1;
+        if ($prefix{$last} == '\\') {
+            return $prefix;
+        }
+
+        return rtrim($prefix, '_') . '_';
+    }
+
+    /**
+     * Add prefixed paths to the registry of paths
+     *
+     * @param string $prefix
+     * @param string $path
+     * @return Zend_Loader_PluginLoader
+     */
+    public function addPrefixPath($prefix, $path)
+    {
+        if (!is_string($prefix) || !is_string($path)) {
+            require_once 'Zend/Loader/PluginLoader/Exception.php';
+            throw new Zend_Loader_PluginLoader_Exception('Zend_Loader_PluginLoader::addPrefixPath() method only takes strings for prefix and path.');
+        }
+
+        $prefix = $this->_formatPrefix($prefix);
+        $path   = rtrim($path, '/\\') . '/';
+
+        if ($this->_useStaticRegistry) {
+            self::$_staticPrefixToPaths[$this->_useStaticRegistry][$prefix][] = $path;
+        } else {
+            if (!isset($this->_prefixToPaths[$prefix])) {
+                $this->_prefixToPaths[$prefix] = array();
+            }
+            if (!in_array($path, $this->_prefixToPaths[$prefix])) {
+                $this->_prefixToPaths[$prefix][] = $path;
+            }
+        }
+        return $this;
+    }
+
+    /**
+     * Get path stack
+     *
+     * @param  string $prefix
+     * @return false|array False if prefix does not exist, array otherwise
+     */
+    public function getPaths($prefix = null)
+    {
+        if ((null !== $prefix) && is_string($prefix)) {
+            $prefix = $this->_formatPrefix($prefix);
+            if ($this->_useStaticRegistry) {
+                if (isset(self::$_staticPrefixToPaths[$this->_useStaticRegistry][$prefix])) {
+                    return self::$_staticPrefixToPaths[$this->_useStaticRegistry][$prefix];
+                }
+
+                return false;
+            }
+
+            if (isset($this->_prefixToPaths[$prefix])) {
+                return $this->_prefixToPaths[$prefix];
+            }
+
+            return false;
+        }
+
+        if ($this->_useStaticRegistry) {
+            return self::$_staticPrefixToPaths[$this->_useStaticRegistry];
+        }
+
+        return $this->_prefixToPaths;
+    }
+
+    /**
+     * Clear path stack
+     *
+     * @param  string $prefix
+     * @return bool False only if $prefix does not exist
+     */
+    public function clearPaths($prefix = null)
+    {
+        if ((null !== $prefix) && is_string($prefix)) {
+            $prefix = $this->_formatPrefix($prefix);
+            if ($this->_useStaticRegistry) {
+                if (isset(self::$_staticPrefixToPaths[$this->_useStaticRegistry][$prefix])) {
+                    unset(self::$_staticPrefixToPaths[$this->_useStaticRegistry][$prefix]);
+                    return true;
+                }
+
+                return false;
+            }
+
+            if (isset($this->_prefixToPaths[$prefix])) {
+                unset($this->_prefixToPaths[$prefix]);
+                return true;
+            }
+
+            return false;
+        }
+
+        if ($this->_useStaticRegistry) {
+            self::$_staticPrefixToPaths[$this->_useStaticRegistry] = array();
+        } else {
+            $this->_prefixToPaths = array();
+        }
+
+        return true;
+    }
+
+    /**
+     * Remove a prefix (or prefixed-path) from the registry
+     *
+     * @param string $prefix
+     * @param string $path OPTIONAL
+     * @return Zend_Loader_PluginLoader
+     */
+    public function removePrefixPath($prefix, $path = null)
+    {
+        $prefix = $this->_formatPrefix($prefix);
+        if ($this->_useStaticRegistry) {
+            $registry =& self::$_staticPrefixToPaths[$this->_useStaticRegistry];
+        } else {
+            $registry =& $this->_prefixToPaths;
+        }
+
+        if (!isset($registry[$prefix])) {
+            require_once 'Zend/Loader/PluginLoader/Exception.php';
+            throw new Zend_Loader_PluginLoader_Exception('Prefix ' . $prefix . ' was not found in the PluginLoader.');
+        }
+
+        if ($path != null) {
+            $pos = array_search($path, $registry[$prefix]);
+            if (false === $pos) {
+                require_once 'Zend/Loader/PluginLoader/Exception.php';
+                throw new Zend_Loader_PluginLoader_Exception('Prefix ' . $prefix . ' / Path ' . $path . ' was not found in the PluginLoader.');
+            }
+            unset($registry[$prefix][$pos]);
+        } else {
+            unset($registry[$prefix]);
+        }
+
+        return $this;
+    }
+
+    /**
+     * Normalize plugin name
+     *
+     * @param  string $name
+     * @return string
+     */
+    protected function _formatName($name)
+    {
+        return ucfirst((string) $name);
+    }
+
+    /**
+     * Whether or not a Plugin by a specific name is loaded
+     *
+     * @param string $name
+     * @return Zend_Loader_PluginLoader
+     */
+    public function isLoaded($name)
+    {
+        $name = $this->_formatName($name);
+        if ($this->_useStaticRegistry) {
+            return isset(self::$_staticLoadedPlugins[$this->_useStaticRegistry][$name]);
+        }
+
+        return isset($this->_loadedPlugins[$name]);
+    }
+
+    /**
+     * Return full class name for a named plugin
+     *
+     * @param string $name
+     * @return string|false False if class not found, class name otherwise
+     */
+    public function getClassName($name)
+    {
+        $name = $this->_formatName($name);
+        if ($this->_useStaticRegistry
+            && isset(self::$_staticLoadedPlugins[$this->_useStaticRegistry][$name])
+        ) {
+            return self::$_staticLoadedPlugins[$this->_useStaticRegistry][$name];
+        } elseif (isset($this->_loadedPlugins[$name])) {
+            return $this->_loadedPlugins[$name];
+        }
+
+        return false;
+    }
+
+    /**
+     * Get path to plugin class
+     *
+     * @param  mixed $name
+     * @return string|false False if not found
+     */
+    public function getClassPath($name)
+    {
+        $name = $this->_formatName($name);
+        if ($this->_useStaticRegistry
+            && !empty(self::$_staticLoadedPluginPaths[$this->_useStaticRegistry][$name])
+        ) {
+            return self::$_staticLoadedPluginPaths[$this->_useStaticRegistry][$name];
+        } elseif (!empty($this->_loadedPluginPaths[$name])) {
+            return $this->_loadedPluginPaths[$name];
+        }
+
+        if ($this->isLoaded($name)) {
+            $class = $this->getClassName($name);
+            $r     = new ReflectionClass($class);
+            $path  = $r->getFileName();
+            if ($this->_useStaticRegistry) {
+                self::$_staticLoadedPluginPaths[$this->_useStaticRegistry][$name] = $path;
+            } else {
+                $this->_loadedPluginPaths[$name] = $path;
+            }
+            return $path;
+        }
+
+        return false;
+    }
+
+    /**
+     * Load a plugin via the name provided
+     *
+     * @param  string $name
+     * @param  bool $throwExceptions Whether or not to throw exceptions if the
+     * class is not resolved
+     * @return string|false Class name of loaded class; false if $throwExceptions
+     * if false and no class found
+     * @throws Zend_Loader_Exception if class not found
+     */
+    public function load($name, $throwExceptions = true)
+    {
+        $name = $this->_formatName($name);
+        if ($this->isLoaded($name)) {
+            return $this->getClassName($name);
+        }
+
+        if ($this->_useStaticRegistry) {
+            $registry = self::$_staticPrefixToPaths[$this->_useStaticRegistry];
+        } else {
+            $registry = $this->_prefixToPaths;
+        }
+
+        $registry  = array_reverse($registry, true);
+        $found     = false;
+        $classFile = str_replace('_', DIRECTORY_SEPARATOR, $name) . '.php';
+        $incFile   = self::getIncludeFileCache();
+        foreach ($registry as $prefix => $paths) {
+            $className = $prefix . $name;
+
+            if (class_exists($className, false)) {
+                $found = true;
+                break;
+            }
+
+            $paths     = array_reverse($paths, true);
+
+            foreach ($paths as $path) {
+                $loadFile = $path . $classFile;
+                if (Zend_Loader::isReadable($loadFile)) {
+                    include_once $loadFile;
+                    if (class_exists($className, false)) {
+                        if (null !== $incFile) {
+                            self::_appendIncFile($loadFile);
+                        }
+                        $found = true;
+                        break 2;
+                    }
+                }
+            }
+        }
+
+        if (!$found) {
+            if (!$throwExceptions) {
+                return false;
+            }
+
+            $message = "Plugin by name '$name' was not found in the registry; used paths:";
+            foreach ($registry as $prefix => $paths) {
+                $message .= "\n$prefix: " . implode(PATH_SEPARATOR, $paths);
+            }
+            require_once 'Zend/Loader/PluginLoader/Exception.php';
+            throw new Zend_Loader_PluginLoader_Exception($message);
+       }
+
+        if ($this->_useStaticRegistry) {
+            self::$_staticLoadedPlugins[$this->_useStaticRegistry][$name]     = $className;
+        } else {
+            $this->_loadedPlugins[$name]     = $className;
+        }
+        return $className;
+    }
+
+    /**
+     * Set path to class file cache
+     *
+     * Specify a path to a file that will add include_once statements for each
+     * plugin class loaded. This is an opt-in feature for performance purposes.
+     *
+     * @param  string $file
+     * @return void
+     * @throws Zend_Loader_PluginLoader_Exception if file is not writeable or path does not exist
+     */
+    public static function setIncludeFileCache($file)
+    {
+        if (null === $file) {
+            self::$_includeFileCache = null;
+            return;
+        }
+
+        if (!file_exists($file) && !file_exists(dirname($file))) {
+            require_once 'Zend/Loader/PluginLoader/Exception.php';
+            throw new Zend_Loader_PluginLoader_Exception('Specified file does not exist and/or directory does not exist (' . $file . ')');
+        }
+        if (file_exists($file) && !is_writable($file)) {
+            require_once 'Zend/Loader/PluginLoader/Exception.php';
+            throw new Zend_Loader_PluginLoader_Exception('Specified file is not writeable (' . $file . ')');
+        }
+        if (!file_exists($file) && file_exists(dirname($file)) && !is_writable(dirname($file))) {
+            require_once 'Zend/Loader/PluginLoader/Exception.php';
+            throw new Zend_Loader_PluginLoader_Exception('Specified file is not writeable (' . $file . ')');
+        }
+
+        self::$_includeFileCache = $file;
+    }
+
+    /**
+     * Retrieve class file cache path
+     *
+     * @return string|null
+     */
+    public static function getIncludeFileCache()
+    {
+        return self::$_includeFileCache;
+    }
+
+    /**
+     * Append an include_once statement to the class file cache
+     *
+     * @param  string $incFile
+     * @return void
+     */
+    protected static function _appendIncFile($incFile)
+    {
+        if (!file_exists(self::$_includeFileCache)) {
+            $file = '<?php';
+        } else {
+            $file = file_get_contents(self::$_includeFileCache);
+        }
+        if (!strstr($file, $incFile)) {
+            $file .= "\ninclude_once '$incFile';";
+            file_put_contents(self::$_includeFileCache, $file);
+        }
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/MimeType.php

@@ -0,0 +1,392 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: MimeType.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * Validator for the mime type of a file
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_MimeType extends Zend_Validate_Abstract
+{
+    /**#@+
+     * @const Error type constants
+     */
+    const FALSE_TYPE   = 'fileMimeTypeFalse';
+    const NOT_DETECTED = 'fileMimeTypeNotDetected';
+    const NOT_READABLE = 'fileMimeTypeNotReadable';
+    /**#@-*/
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::FALSE_TYPE   => "File '%value%' has a false mimetype of '%type%'",
+        self::NOT_DETECTED => "The mimetype of file '%value%' could not be detected",
+        self::NOT_READABLE => "File '%value%' is not readable or does not exist",
+    );
+
+    /**
+     * @var array
+     */
+    protected $_messageVariables = array(
+        'type' => '_type'
+    );
+
+    /**
+     * @var string
+     */
+    protected $_type;
+
+    /**
+     * Mimetypes
+     *
+     * If null, there is no mimetype
+     *
+     * @var string|null
+     */
+    protected $_mimetype;
+
+    /**
+     * Magicfile to use
+     *
+     * @var string|null
+     */
+    protected $_magicfile;
+
+    /**
+     * Finfo object to use
+     *
+     * @var resource
+     */
+    protected $_finfo;
+
+    /**
+     * If no $_ENV['MAGIC'] is set, try and autodiscover it based on common locations
+     * @var array
+     */
+    protected $_magicFiles = array(
+        '/usr/share/misc/magic',
+        '/usr/share/misc/magic.mime',
+        '/usr/share/misc/magic.mgc',
+        '/usr/share/mime/magic',
+        '/usr/share/mime/magic.mime',
+        '/usr/share/mime/magic.mgc',
+        '/usr/share/file/magic',
+        '/usr/share/file/magic.mime',
+        '/usr/share/file/magic.mgc',
+    );
+
+    /**
+     * Option to allow header check
+     *
+     * @var boolean
+     */
+    protected $_headerCheck = false;
+
+    /**
+     * Sets validator options
+     *
+     * Mimetype to accept
+     *
+     * @param  string|array $mimetype MimeType
+     * @return void
+     */
+    public function __construct($mimetype)
+    {
+        if ($mimetype instanceof Zend_Config) {
+            $mimetype = $mimetype->toArray();
+        } elseif (is_string($mimetype)) {
+            $mimetype = explode(',', $mimetype);
+        } elseif (!is_array($mimetype)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("Invalid options to validator provided");
+        }
+
+        if (isset($mimetype['magicfile'])) {
+            $this->setMagicFile($mimetype['magicfile']);
+            unset($mimetype['magicfile']);
+        }
+
+        if (isset($mimetype['headerCheck'])) {
+            $this->enableHeaderCheck($mimetype['headerCheck']);
+            unset($mimetype['headerCheck']);
+        }
+
+        $this->setMimeType($mimetype);
+    }
+
+    /**
+     * Returns the actual set magicfile
+     *
+     * @return string
+     */
+    public function getMagicFile()
+    {
+        if (null === $this->_magicfile) {
+            if (!empty($_ENV['MAGIC'])) {
+                $this->setMagicFile($_ENV['MAGIC']);
+            } elseif (!(@ini_get("safe_mode") == 'On' || @ini_get("safe_mode") === 1)) {
+                require_once 'Zend/Validate/Exception.php';
+                foreach ($this->_magicFiles as $file) {
+                    // supressing errors which are thrown due to openbase_dir restrictions
+                    try {
+                        $this->setMagicFile($file);
+                        if ($this->_magicfile !== null) {
+                            break;
+                        }
+                    } catch (Zend_Validate_Exception $e) {
+                        // Intentionally, catch and fall through
+                    }
+                }
+            }
+
+            if ($this->_magicfile === null) {
+                $this->_magicfile = false;
+            }
+        }
+
+        return $this->_magicfile;
+    }
+
+    /**
+     * Sets the magicfile to use
+     * if null, the MAGIC constant from php is used
+     * if the MAGIC file is errorous, no file will be set
+     *
+     * @param  string $file
+     * @throws Zend_Validate_Exception When finfo can not read the magicfile
+     * @return Zend_Validate_File_MimeType Provides fluid interface
+     */
+    public function setMagicFile($file)
+    {
+        if (empty($file)) {
+            $this->_magicfile = null;
+        } else if (!(class_exists('finfo', false))) {
+            $this->_magicfile = null;
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Magicfile can not be set. There is no finfo extension installed');
+        } else if (!is_file($file) || !is_readable($file)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('The given magicfile can not be read');
+        } else {
+            $const = defined('FILEINFO_MIME_TYPE') ? FILEINFO_MIME_TYPE : FILEINFO_MIME;
+            $this->_finfo = @finfo_open($const, $file);
+            if (empty($this->_finfo)) {
+                $this->_finfo = null;
+                require_once 'Zend/Validate/Exception.php';
+                throw new Zend_Validate_Exception('The given magicfile is not accepted by finfo');
+            } else {
+                $this->_magicfile = $file;
+            }
+        }
+
+        return $this;
+    }
+
+    /**
+     * Returns the Header Check option
+     *
+     * @return boolean
+     */
+    public function getHeaderCheck()
+    {
+        return $this->_headerCheck;
+    }
+
+    /**
+     * Defines if the http header should be used
+     * Note that this is unsave and therefor the default value is false
+     *
+     * @param  boolean $checkHeader
+     * @return Zend_Validate_File_MimeType Provides fluid interface
+     */
+    public function enableHeaderCheck($headerCheck = true)
+    {
+        $this->_headerCheck = (boolean) $headerCheck;
+        return $this;
+    }
+
+    /**
+     * Returns the set mimetypes
+     *
+     * @param  boolean $asArray Returns the values as array, when false an concated string is returned
+     * @return string|array
+     */
+    public function getMimeType($asArray = false)
+    {
+        $asArray   = (bool) $asArray;
+        $mimetype = (string) $this->_mimetype;
+        if ($asArray) {
+            $mimetype = explode(',', $mimetype);
+        }
+
+        return $mimetype;
+    }
+
+    /**
+     * Sets the mimetypes
+     *
+     * @param  string|array $mimetype The mimetypes to validate
+     * @return Zend_Validate_File_Extension Provides a fluent interface
+     */
+    public function setMimeType($mimetype)
+    {
+        $this->_mimetype = null;
+        $this->addMimeType($mimetype);
+        return $this;
+    }
+
+    /**
+     * Adds the mimetypes
+     *
+     * @param  string|array $mimetype The mimetypes to add for validation
+     * @return Zend_Validate_File_Extension Provides a fluent interface
+     */
+    public function addMimeType($mimetype)
+    {
+        $mimetypes = $this->getMimeType(true);
+
+        if (is_string($mimetype)) {
+            $mimetype = explode(',', $mimetype);
+        } elseif (!is_array($mimetype)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("Invalid options to validator provided");
+        }
+
+        if (isset($mimetype['magicfile'])) {
+            unset($mimetype['magicfile']);
+        }
+
+        foreach ($mimetype as $content) {
+            if (empty($content) || !is_string($content)) {
+                continue;
+            }
+            $mimetypes[] = trim($content);
+        }
+        $mimetypes = array_unique($mimetypes);
+
+        // Sanity check to ensure no empty values
+        foreach ($mimetypes as $key => $mt) {
+            if (empty($mt)) {
+                unset($mimetypes[$key]);
+            }
+        }
+
+        $this->_mimetype = implode(',', $mimetypes);
+
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if the mimetype of the file matches the given ones. Also parts
+     * of mimetypes can be checked. If you give for example "image" all image
+     * mime types will be accepted like "image/gif", "image/jpeg" and so on.
+     *
+     * @param  string $value Real file to check for mimetype
+     * @param  array  $file  File data from Zend_File_Transfer
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        if ($file === null) {
+            $file = array(
+                'type' => null,
+                'name' => $value
+            );
+        }
+
+        // Is file readable ?
+        require_once 'Zend/Loader.php';
+        if (!Zend_Loader::isReadable($value)) {
+            return $this->_throw($file, self::NOT_READABLE);
+        }
+
+        $mimefile = $this->getMagicFile();
+        if (class_exists('finfo', false)) {
+            $const = defined('FILEINFO_MIME_TYPE') ? FILEINFO_MIME_TYPE : FILEINFO_MIME;
+            if (!empty($mimefile) && empty($this->_finfo)) {
+                $this->_finfo = @finfo_open($const, $mimefile);
+            }
+
+            if (empty($this->_finfo)) {
+                $this->_finfo = @finfo_open($const);
+            }
+
+            $this->_type = null;
+            if (!empty($this->_finfo)) {
+                $this->_type = finfo_file($this->_finfo, $value);
+            }
+        }
+
+        if (empty($this->_type) &&
+            (function_exists('mime_content_type') && ini_get('mime_magic.magicfile'))) {
+                $this->_type = mime_content_type($value);
+        }
+
+        if (empty($this->_type) && $this->_headerCheck) {
+            $this->_type = $file['type'];
+        }
+
+        if (empty($this->_type)) {
+            return $this->_throw($file, self::NOT_DETECTED);
+        }
+
+        $mimetype = $this->getMimeType(true);
+        if (in_array($this->_type, $mimetype)) {
+            return true;
+        }
+
+        $types = explode('/', $this->_type);
+        $types = array_merge($types, explode('-', $this->_type));
+        $types = array_merge($types, explode(';', $this->_type));
+        foreach($mimetype as $mime) {
+            if (in_array($mime, $types)) {
+                return true;
+            }
+        }
+
+        return $this->_throw($file, self::FALSE_TYPE);
+    }
+
+    /**
+     * Throws an error of the given type
+     *
+     * @param  string $file
+     * @param  string $errorType
+     * @return false
+     */
+    protected function _throw($file, $errorType)
+    {
+        $this->_value = $file['name'];
+        $this->_error($errorType);
+        return false;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/Sha1.php

@@ -0,0 +1,181 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: Sha1.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_File_Hash
+ */
+require_once 'Zend/Validate/File/Hash.php';
+
+/**
+ * Validator for the sha1 hash of given files
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_Sha1 extends Zend_Validate_File_Hash
+{
+    /**
+     * @const string Error constants
+     */
+    const DOES_NOT_MATCH = 'fileSha1DoesNotMatch';
+    const NOT_DETECTED   = 'fileSha1NotDetected';
+    const NOT_FOUND      = 'fileSha1NotFound';
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::DOES_NOT_MATCH => "File '%value%' does not match the given sha1 hashes",
+        self::NOT_DETECTED   => "A sha1 hash could not be evaluated for the given file",
+        self::NOT_FOUND      => "File '%value%' is not readable or does not exist",
+    );
+
+    /**
+     * Hash of the file
+     *
+     * @var string
+     */
+    protected $_hash;
+
+    /**
+     * Sets validator options
+     *
+     * $hash is the hash we accept for the file $file
+     *
+     * @param  string|array $options
+     * @return void
+     */
+    public function __construct($options)
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } elseif (is_scalar($options)) {
+            $options = array('hash1' => $options);
+        } elseif (!is_array($options)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Invalid options to validator provided');
+        }
+
+        $this->setHash($options);
+    }
+
+    /**
+     * Returns all set sha1 hashes
+     *
+     * @return array
+     */
+    public function getSha1()
+    {
+        return $this->getHash();
+    }
+
+    /**
+     * Sets the sha1 hash for one or multiple files
+     *
+     * @param  string|array $options
+     * @return Zend_Validate_File_Hash Provides a fluent interface
+     */
+    public function setHash($options)
+    {
+        if (!is_array($options)) {
+            $options = (array) $options;
+        }
+
+        $options['algorithm'] = 'sha1';
+        parent::setHash($options);
+        return $this;
+    }
+
+    /**
+     * Sets the sha1 hash for one or multiple files
+     *
+     * @param  string|array $options
+     * @return Zend_Validate_File_Hash Provides a fluent interface
+     */
+    public function setSha1($options)
+    {
+        $this->setHash($options);
+        return $this;
+    }
+
+    /**
+     * Adds the sha1 hash for one or multiple files
+     *
+     * @param  string|array $options
+     * @return Zend_Validate_File_Hash Provides a fluent interface
+     */
+    public function addHash($options)
+    {
+        if (!is_array($options)) {
+            $options = (array) $options;
+        }
+
+        $options['algorithm'] = 'sha1';
+        parent::addHash($options);
+        return $this;
+    }
+
+    /**
+     * Adds the sha1 hash for one or multiple files
+     *
+     * @param  string|array $options
+     * @return Zend_Validate_File_Hash Provides a fluent interface
+     */
+    public function addSha1($options)
+    {
+        $this->addHash($options);
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the given file confirms the set hash
+     *
+     * @param  string $value Filename to check for hash
+     * @param  array  $file  File data from Zend_File_Transfer
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        // Is file readable ?
+        require_once 'Zend/Loader.php';
+        if (!Zend_Loader::isReadable($value)) {
+            return $this->_throw($file, self::NOT_FOUND);
+        }
+
+        $hashes = array_unique(array_keys($this->_hash));
+        $filehash = hash_file('sha1', $value);
+        if ($filehash === false) {
+            return $this->_throw($file, self::NOT_DETECTED);
+        }
+
+        foreach ($hashes as $hash) {
+            if ($filehash === $hash) {
+                return true;
+            }
+        }
+
+        return $this->_throw($file, self::DOES_NOT_MATCH);
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/FilesSize.php

@@ -0,0 +1,164 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: FilesSize.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_File_Size
+ */
+require_once 'Zend/Validate/File/Size.php';
+
+/**
+ * Validator for the size of all files which will be validated in sum
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_FilesSize extends Zend_Validate_File_Size
+{
+    /**
+     * @const string Error constants
+     */
+    const TOO_BIG      = 'fileFilesSizeTooBig';
+    const TOO_SMALL    = 'fileFilesSizeTooSmall';
+    const NOT_READABLE = 'fileFilesSizeNotReadable';
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::TOO_BIG      => "All files in sum should have a maximum size of '%max%' but '%size%' were detected",
+        self::TOO_SMALL    => "All files in sum should have a minimum size of '%min%' but '%size%' were detected",
+        self::NOT_READABLE => "One or more files can not be read",
+    );
+
+    /**
+     * Internal file array
+     *
+     * @var array
+     */
+    protected $_files;
+
+    /**
+     * Sets validator options
+     *
+     * Min limits the used diskspace for all files, when used with max=null it is the maximum filesize
+     * It also accepts an array with the keys 'min' and 'max'
+     *
+     * @param  integer|array|Zend_Config $options Options for this validator
+     * @return void
+     */
+    public function __construct($options)
+    {
+        $this->_files = array();
+        $this->_setSize(0);
+
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } elseif (is_scalar($options)) {
+            $options = array('max' => $options);
+        } elseif (!is_array($options)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Invalid options to validator provided');
+        }
+
+        if (1 < func_num_args()) {
+            $argv = func_get_args();
+            array_shift($argv);
+            $options['max'] = array_shift($argv);
+            if (!empty($argv)) {
+                $options['bytestring'] = array_shift($argv);
+            }
+        }
+
+        parent::__construct($options);
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the disk usage of all files is at least min and
+     * not bigger than max (when max is not null).
+     *
+     * @param  string|array $value Real file to check for size
+     * @param  array        $file  File data from Zend_File_Transfer
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        require_once 'Zend/Loader.php';
+        if (is_string($value)) {
+            $value = array($value);
+        }
+
+        $min  = $this->getMin(true);
+        $max  = $this->getMax(true);
+        $size = $this->_getSize();
+        foreach ($value as $files) {
+            // Is file readable ?
+            if (!Zend_Loader::isReadable($files)) {
+                $this->_throw($file, self::NOT_READABLE);
+                continue;
+            }
+
+            if (!isset($this->_files[$files])) {
+                $this->_files[$files] = $files;
+            } else {
+                // file already counted... do not count twice
+                continue;
+            }
+
+            // limited to 2GB files
+            $size += @filesize($files);
+            $this->_size = $size;
+            if (($max !== null) && ($max < $size)) {
+                if ($this->useByteString()) {
+                    $this->_max  = $this->_toByteString($max);
+                    $this->_size = $this->_toByteString($size);
+                    $this->_throw($file, self::TOO_BIG);
+                    $this->_max  = $max;
+                    $this->_size = $size;
+                } else {
+                    $this->_throw($file, self::TOO_BIG);
+                }
+            }
+        }
+
+        // Check that aggregate files are >= minimum size
+        if (($min !== null) && ($size < $min)) {
+            if ($this->useByteString()) {
+                $this->_min  = $this->_toByteString($min);
+                $this->_size = $this->_toByteString($size);
+                $this->_throw($file, self::TOO_SMALL);
+                $this->_min  = $min;
+                $this->_size = $size;
+            } else {
+                $this->_throw($file, self::TOO_SMALL);
+            }
+        }
+
+        if (count($this->_messages) > 0) {
+            return false;
+        }
+
+        return true;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/IsCompressed.php

@@ -0,0 +1,149 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: IsCompressed.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_File_MimeType
+ */
+require_once 'Zend/Validate/File/MimeType.php';
+
+/**
+ * Validator which checks if the file already exists in the directory
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_IsCompressed extends Zend_Validate_File_MimeType
+{
+    /**
+     * @const string Error constants
+     */
+    const FALSE_TYPE   = 'fileIsCompressedFalseType';
+    const NOT_DETECTED = 'fileIsCompressedNotDetected';
+    const NOT_READABLE = 'fileIsCompressedNotReadable';
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::FALSE_TYPE   => "File '%value%' is not compressed, '%type%' detected",
+        self::NOT_DETECTED => "The mimetype of file '%value%' could not be detected",
+        self::NOT_READABLE => "File '%value%' is not readable or does not exist",
+    );
+
+    /**
+     * Sets validator options
+     *
+     * @param  string|array|Zend_Config $compression
+     * @return void
+     */
+    public function __construct($mimetype = array())
+    {
+        if ($mimetype instanceof Zend_Config) {
+            $mimetype = $mimetype->toArray();
+        }
+
+        $temp    = array();
+        // http://de.wikipedia.org/wiki/Liste_von_Dateiendungen
+            $default = array(
+            'application/arj',
+            'application/gnutar',
+            'application/lha',
+            'application/lzx',
+            'application/vnd.ms-cab-compressed',
+            'application/x-ace-compressed',
+            'application/x-arc',
+            'application/x-archive',
+            'application/x-arj',
+            'application/x-bzip',
+            'application/x-bzip2',
+            'application/x-cab-compressed',
+            'application/x-compress',
+            'application/x-compressed',
+            'application/x-cpio',
+            'application/x-debian-package',
+            'application/x-eet',
+            'application/x-gzip',
+            'application/x-java-pack200',
+            'application/x-lha',
+            'application/x-lharc',
+            'application/x-lzh',
+            'application/x-lzma',
+            'application/x-lzx',
+            'application/x-rar',
+            'application/x-sit',
+            'application/x-stuffit',
+            'application/x-tar',
+            'application/zip',
+            'application/zoo',
+            'multipart/x-gzip',
+        );
+
+        if (is_array($mimetype)) {
+            $temp = $mimetype;
+            if (array_key_exists('magicfile', $temp)) {
+                unset($temp['magicfile']);
+            }
+
+            if (array_key_exists('headerCheck', $temp)) {
+                unset($temp['headerCheck']);
+            }
+
+            if (empty($temp)) {
+                $mimetype += $default;
+            }
+        }
+
+        if (empty($mimetype)) {
+            $mimetype = $default;
+        }
+
+        parent::__construct($mimetype);
+    }
+
+    /**
+     * Throws an error of the given type
+     * Duplicates parent method due to OOP Problem with late static binding in PHP 5.2
+     *
+     * @param  string $file
+     * @param  string $errorType
+     * @return false
+     */
+    protected function _throw($file, $errorType)
+    {
+        $this->_value = $file['name'];
+        switch($errorType) {
+            case Zend_Validate_File_MimeType::FALSE_TYPE :
+                $errorType = self::FALSE_TYPE;
+                break;
+            case Zend_Validate_File_MimeType::NOT_DETECTED :
+                $errorType = self::NOT_DETECTED;
+                break;
+            case Zend_Validate_File_MimeType::NOT_READABLE :
+                $errorType = self::NOT_READABLE;
+                break;
+        }
+
+        $this->_error($errorType);
+        return false;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/NotExists.php

@@ -0,0 +1,84 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: NotExists.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_File_Exists
+ */
+require_once 'Zend/Validate/File/Exists.php';
+
+/**
+ * Validator which checks if the destination file does not exist
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_NotExists extends Zend_Validate_File_Exists
+{
+    /**
+     * @const string Error constants
+     */
+    const DOES_EXIST = 'fileNotExistsDoesExist';
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::DOES_EXIST => "File '%value%' exists",
+    );
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the file does not exist in the set destinations
+     *
+     * @param  string  $value Real file to check for
+     * @param  array   $file  File data from Zend_File_Transfer
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        $directories = $this->getDirectory(true);
+        if (($file !== null) and (!empty($file['destination']))) {
+            $directories[] = $file['destination'];
+        } else if (!isset($file['name'])) {
+            $file['name'] = $value;
+        }
+
+        foreach ($directories as $directory) {
+            if (empty($directory)) {
+                continue;
+            }
+
+            $check = true;
+            if (file_exists($directory . DIRECTORY_SEPARATOR . $file['name'])) {
+                return $this->_throw($file, self::DOES_EXIST);
+            }
+        }
+
+        if (!isset($check)) {
+            return $this->_throw($file, self::DOES_EXIST);
+        }
+
+        return true;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/ExcludeMimeType.php

@@ -0,0 +1,109 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: ExcludeMimeType.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_File_MimeType
+ */
+require_once 'Zend/Validate/File/MimeType.php';
+
+/**
+ * Validator for the mime type of a file
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_ExcludeMimeType extends Zend_Validate_File_MimeType
+{
+    const FALSE_TYPE   = 'fileExcludeMimeTypeFalse';
+    const NOT_DETECTED = 'fileExcludeMimeTypeNotDetected';
+    const NOT_READABLE = 'fileExcludeMimeTypeNotReadable';
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if the mimetype of the file does not matche the given ones. Also parts
+     * of mimetypes can be checked. If you give for example "image" all image
+     * mime types will not be accepted like "image/gif", "image/jpeg" and so on.
+     *
+     * @param  string $value Real file to check for mimetype
+     * @param  array  $file  File data from Zend_File_Transfer
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        if ($file === null) {
+            $file = array(
+                'type' => null,
+                'name' => $value
+            );
+        }
+
+        // Is file readable ?
+        require_once 'Zend/Loader.php';
+        if (!Zend_Loader::isReadable($value)) {
+            return $this->_throw($file, self::NOT_READABLE);
+        }
+
+        $mimefile = $this->getMagicFile();
+        if (class_exists('finfo', false)) {
+            $const = defined('FILEINFO_MIME_TYPE') ? FILEINFO_MIME_TYPE : FILEINFO_MIME;
+            if (!empty($mimefile)) {
+                $mime = new finfo($const, $mimefile);
+            } else {
+                $mime = new finfo($const);
+            }
+
+            if (!empty($mime)) {
+                $this->_type = $mime->file($value);
+            }
+            unset($mime);
+        }
+
+        if (empty($this->_type)) {
+            if (function_exists('mime_content_type') && ini_get('mime_magic.magicfile')) {
+                $this->_type = mime_content_type($value);
+            } elseif ($this->_headerCheck) {
+                $this->_type = $file['type'];
+            }
+        }
+
+        if (empty($this->_type)) {
+            return $this->_throw($file, self::NOT_DETECTED);
+        }
+
+        $mimetype = $this->getMimeType(true);
+        if (in_array($this->_type, $mimetype)) {
+            return $this->_throw($file, self::FALSE_TYPE);
+        }
+
+        $types = explode('/', $this->_type);
+        $types = array_merge($types, explode('-', $this->_type));
+        foreach($mimetype as $mime) {
+            if (in_array($mime, $types)) {
+                return $this->_throw($file, self::FALSE_TYPE);
+            }
+        }
+
+        return true;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/Size.php

@@ -0,0 +1,404 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: Size.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * Validator for the maximum size of a file up to a max of 2GB
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_Size extends Zend_Validate_Abstract
+{
+    /**#@+
+     * @const string Error constants
+     */
+    const TOO_BIG   = 'fileSizeTooBig';
+    const TOO_SMALL = 'fileSizeTooSmall';
+    const NOT_FOUND = 'fileSizeNotFound';
+    /**#@-*/
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::TOO_BIG   => "Maximum allowed size for file '%value%' is '%max%' but '%size%' detected",
+        self::TOO_SMALL => "Minimum expected size for file '%value%' is '%min%' but '%size%' detected",
+        self::NOT_FOUND => "File '%value%' is not readable or does not exist",
+    );
+
+    /**
+     * @var array Error message template variables
+     */
+    protected $_messageVariables = array(
+        'min'  => '_min',
+        'max'  => '_max',
+        'size' => '_size',
+    );
+
+    /**
+     * Minimum filesize
+     * @var integer
+     */
+    protected $_min;
+
+    /**
+     * Maximum filesize
+     *
+     * If null, there is no maximum filesize
+     *
+     * @var integer|null
+     */
+    protected $_max;
+
+    /**
+     * Detected size
+     *
+     * @var integer
+     */
+    protected $_size;
+
+    /**
+     * Use bytestring ?
+     *
+     * @var boolean
+     */
+    protected $_useByteString = true;
+
+    /**
+     * Sets validator options
+     *
+     * If $options is a integer, it will be used as maximum filesize
+     * As Array is accepts the following keys:
+     * 'min': Minimum filesize
+     * 'max': Maximum filesize
+     * 'bytestring': Use bytestring or real size for messages
+     *
+     * @param  integer|array $options Options for the adapter
+     */
+    public function __construct($options)
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } elseif (is_string($options) || is_numeric($options)) {
+            $options = array('max' => $options);
+        } elseif (!is_array($options)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception ('Invalid options to validator provided');
+        }
+
+        if (1 < func_num_args()) {
+            $argv = func_get_args();
+            array_shift($argv);
+            $options['max'] = array_shift($argv);
+            if (!empty($argv)) {
+                $options['bytestring'] = array_shift($argv);
+            }
+        }
+
+        if (isset($options['bytestring'])) {
+            $this->setUseByteString($options['bytestring']);
+        }
+
+        if (isset($options['min'])) {
+            $this->setMin($options['min']);
+        }
+
+        if (isset($options['max'])) {
+            $this->setMax($options['max']);
+        }
+    }
+
+    /**
+     * Returns the minimum filesize
+     *
+     * @param  boolean $byteString Use bytestring ?
+     * @return integer
+     */
+    public function setUseByteString($byteString = true)
+    {
+        $this->_useByteString = (bool) $byteString;
+        return $this;
+    }
+
+    /**
+     * Will bytestring be used?
+     *
+     * @return boolean
+     */
+    public function useByteString()
+    {
+        return $this->_useByteString;
+    }
+
+    /**
+     * Returns the minimum filesize
+     *
+     * @param  bool $raw Whether or not to force return of the raw value (defaults off)
+     * @return integer|string
+     */
+    public function getMin($raw = false)
+    {
+        $min = $this->_min;
+        if (!$raw && $this->useByteString()) {
+            $min = $this->_toByteString($min);
+        }
+
+        return $min;
+    }
+
+    /**
+     * Sets the minimum filesize
+     *
+     * @param  integer $min The minimum filesize
+     * @throws Zend_Validate_Exception When min is greater than max
+     * @return Zend_Validate_File_Size Provides a fluent interface
+     */
+    public function setMin($min)
+    {
+        if (!is_string($min) and !is_numeric($min)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception ('Invalid options to validator provided');
+        }
+
+        $min = (integer) $this->_fromByteString($min);
+        $max = $this->getMax(true);
+        if (($max !== null) && ($min > $max)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("The minimum must be less than or equal to the maximum filesize, but $min >"
+                                            . " $max");
+        }
+
+        $this->_min = $min;
+        return $this;
+    }
+
+    /**
+     * Returns the maximum filesize
+     *
+     * @param  bool $raw Whether or not to force return of the raw value (defaults off)
+     * @return integer|string
+     */
+    public function getMax($raw = false)
+    {
+        $max = $this->_max;
+        if (!$raw && $this->useByteString()) {
+            $max = $this->_toByteString($max);
+        }
+
+        return $max;
+    }
+
+    /**
+     * Sets the maximum filesize
+     *
+     * @param  integer $max The maximum filesize
+     * @throws Zend_Validate_Exception When max is smaller than min
+     * @return Zend_Validate_StringLength Provides a fluent interface
+     */
+    public function setMax($max)
+    {
+        if (!is_string($max) && !is_numeric($max)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception ('Invalid options to validator provided');
+        }
+
+        $max = (integer) $this->_fromByteString($max);
+        $min = $this->getMin(true);
+        if (($min !== null) && ($max < $min)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("The maximum must be greater than or equal to the minimum filesize, but "
+                                            . "$max < $min");
+        }
+
+        $this->_max = $max;
+        return $this;
+    }
+
+    /**
+     * Retrieve current detected file size
+     *
+     * @return int
+     */
+    protected function _getSize()
+    {
+        return $this->_size;
+    }
+
+    /**
+     * Set current size
+     *
+     * @param  int $size
+     * @return Zend_Validate_File_Size
+     */
+    protected function _setSize($size)
+    {
+        $this->_size = $size;
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the filesize of $value is at least min and
+     * not bigger than max (when max is not null).
+     *
+     * @param  string $value Real file to check for size
+     * @param  array  $file  File data from Zend_File_Transfer
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        // Is file readable ?
+        require_once 'Zend/Loader.php';
+        if (!Zend_Loader::isReadable($value)) {
+            return $this->_throw($file, self::NOT_FOUND);
+        }
+
+        // limited to 4GB files
+        $size        = sprintf("%u", @filesize($value));
+        $this->_size = $size;
+
+        // Check to see if it's smaller than min size
+        $min = $this->getMin(true);
+        $max = $this->getMax(true);
+        if (($min !== null) && ($size < $min)) {
+            if ($this->useByteString()) {
+                $this->_min  = $this->_toByteString($min);
+                $this->_size = $this->_toByteString($size);
+                $this->_throw($file, self::TOO_SMALL);
+                $this->_min  = $min;
+                $this->_size = $size;
+            } else {
+                $this->_throw($file, self::TOO_SMALL);
+            }
+        }
+
+        // Check to see if it's larger than max size
+        if (($max !== null) && ($max < $size)) {
+            if ($this->useByteString()) {
+                $this->_max  = $this->_toByteString($max);
+                $this->_size = $this->_toByteString($size);
+                $this->_throw($file, self::TOO_BIG);
+                $this->_max  = $max;
+                $this->_size = $size;
+            } else {
+                $this->_throw($file, self::TOO_BIG);
+            }
+        }
+
+        if (count($this->_messages) > 0) {
+            return false;
+        }
+
+        return true;
+    }
+
+    /**
+     * Returns the formatted size
+     *
+     * @param  integer $size
+     * @return string
+     */
+    protected function _toByteString($size)
+    {
+        $sizes = array('B', 'kB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB');
+        for ($i=0; $size >= 1024 && $i < 9; $i++) {
+            $size /= 1024;
+        }
+
+        return round($size, 2) . $sizes[$i];
+    }
+
+    /**
+     * Returns the unformatted size
+     *
+     * @param  string $size
+     * @return integer
+     */
+    protected function _fromByteString($size)
+    {
+        if (is_numeric($size)) {
+            return (integer) $size;
+        }
+
+        $type  = trim(substr($size, -2, 1));
+
+        $value = substr($size, 0, -1);
+        if (!is_numeric($value)) {
+            $value = substr($value, 0, -1);
+        }
+
+        switch (strtoupper($type)) {
+            case 'Y':
+                $value *= (1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024);
+                break;
+            case 'Z':
+                $value *= (1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024);
+                break;
+            case 'E':
+                $value *= (1024 * 1024 * 1024 * 1024 * 1024 * 1024);
+                break;
+            case 'P':
+                $value *= (1024 * 1024 * 1024 * 1024 * 1024);
+                break;
+            case 'T':
+                $value *= (1024 * 1024 * 1024 * 1024);
+                break;
+            case 'G':
+                $value *= (1024 * 1024 * 1024);
+                break;
+            case 'M':
+                $value *= (1024 * 1024);
+                break;
+            case 'K':
+                $value *= 1024;
+                break;
+            default:
+                break;
+        }
+
+        return $value;
+    }
+
+    /**
+     * Throws an error of the given type
+     *
+     * @param  string $file
+     * @param  string $errorType
+     * @return false
+     */
+    protected function _throw($file, $errorType)
+    {
+        if ($file !== null) {
+            $this->_value = $file['name'];
+        }
+
+        $this->_error($errorType);
+        return false;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/Crc32.php

@@ -0,0 +1,179 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: Crc32.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_File_Hash
+ */
+require_once 'Zend/Validate/File/Hash.php';
+
+/**
+ * Validator for the crc32 hash of given files
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_Crc32 extends Zend_Validate_File_Hash
+{
+    /**
+     * @const string Error constants
+     */
+    const DOES_NOT_MATCH = 'fileCrc32DoesNotMatch';
+    const NOT_DETECTED   = 'fileCrc32NotDetected';
+    const NOT_FOUND      = 'fileCrc32NotFound';
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::DOES_NOT_MATCH => "File '%value%' does not match the given crc32 hashes",
+        self::NOT_DETECTED   => "A crc32 hash could not be evaluated for the given file",
+        self::NOT_FOUND      => "File '%value%' is not readable or does not exist",
+    );
+
+    /**
+     * Hash of the file
+     *
+     * @var string
+     */
+    protected $_hash;
+
+    /**
+     * Sets validator options
+     *
+     * @param  string|array|Zend_Config $options
+     * @return void
+     */
+    public function __construct($options)
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } elseif (is_scalar($options)) {
+            $options = array('hash1' => $options);
+        } elseif (!is_array($options)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Invalid options to validator provided');
+        }
+
+        $this->setCrc32($options);
+    }
+
+    /**
+     * Returns all set crc32 hashes
+     *
+     * @return array
+     */
+    public function getCrc32()
+    {
+        return $this->getHash();
+    }
+
+    /**
+     * Sets the crc32 hash for one or multiple files
+     *
+     * @param  string|array $options
+     * @return Zend_Validate_File_Hash Provides a fluent interface
+     */
+    public function setHash($options)
+    {
+        if (!is_array($options)) {
+            $options = array($options);
+        }
+
+        $options['algorithm'] = 'crc32';
+        parent::setHash($options);
+        return $this;
+    }
+
+    /**
+     * Sets the crc32 hash for one or multiple files
+     *
+     * @param  string|array $options
+     * @return Zend_Validate_File_Hash Provides a fluent interface
+     */
+    public function setCrc32($options)
+    {
+        $this->setHash($options);
+        return $this;
+    }
+
+    /**
+     * Adds the crc32 hash for one or multiple files
+     *
+     * @param  string|array $options
+     * @return Zend_Validate_File_Hash Provides a fluent interface
+     */
+    public function addHash($options)
+    {
+        if (!is_array($options)) {
+            $options = array($options);
+        }
+
+        $options['algorithm'] = 'crc32';
+        parent::addHash($options);
+        return $this;
+    }
+
+    /**
+     * Adds the crc32 hash for one or multiple files
+     *
+     * @param  string|array $options
+     * @return Zend_Validate_File_Hash Provides a fluent interface
+     */
+    public function addCrc32($options)
+    {
+        $this->addHash($options);
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the given file confirms the set hash
+     *
+     * @param  string $value Filename to check for hash
+     * @param  array  $file  File data from Zend_File_Transfer
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        // Is file readable ?
+        require_once 'Zend/Loader.php';
+        if (!Zend_Loader::isReadable($value)) {
+            return $this->_throw($file, self::NOT_FOUND);
+        }
+
+        $hashes = array_unique(array_keys($this->_hash));
+        $filehash = hash_file('crc32', $value);
+        if ($filehash === false) {
+            return $this->_throw($file, self::NOT_DETECTED);
+        }
+
+        foreach($hashes as $hash) {
+            if ($filehash === $hash) {
+                return true;
+            }
+        }
+
+        return $this->_throw($file, self::DOES_NOT_MATCH);
+    }
+}
\ No newline at end of file

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/Extension.php

@@ -0,0 +1,232 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: Extension.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * Validator for the file extension of a file
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_Extension extends Zend_Validate_Abstract
+{
+    /**
+     * @const string Error constants
+     */
+    const FALSE_EXTENSION = 'fileExtensionFalse';
+    const NOT_FOUND       = 'fileExtensionNotFound';
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::FALSE_EXTENSION => "File '%value%' has a false extension",
+        self::NOT_FOUND       => "File '%value%' is not readable or does not exist",
+    );
+
+    /**
+     * Internal list of extensions
+     * @var string
+     */
+    protected $_extension = '';
+
+    /**
+     * Validate case sensitive
+     *
+     * @var boolean
+     */
+    protected $_case = false;
+
+    /**
+     * @var array Error message template variables
+     */
+    protected $_messageVariables = array(
+        'extension' => '_extension'
+    );
+
+    /**
+     * Sets validator options
+     *
+     * @param  string|array|Zend_Config $options
+     * @return void
+     */
+    public function __construct($options)
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        }
+
+        if (1 < func_num_args()) {
+            $case = func_get_arg(1);
+            $this->setCase($case);
+        }
+
+        if (is_array($options) and isset($options['case'])) {
+            $this->setCase($options['case']);
+            unset($options['case']);
+        }
+
+        $this->setExtension($options);
+    }
+
+    /**
+     * Returns the case option
+     *
+     * @return boolean
+     */
+    public function getCase()
+    {
+        return $this->_case;
+    }
+
+    /**
+     * Sets the case to use
+     *
+     * @param  boolean $case
+     * @return Zend_Validate_File_Extension Provides a fluent interface
+     */
+    public function setCase($case)
+    {
+        $this->_case = (boolean) $case;
+        return $this;
+    }
+
+    /**
+     * Returns the set file extension
+     *
+     * @return array
+     */
+    public function getExtension()
+    {
+        $extension = explode(',', $this->_extension);
+
+        return $extension;
+    }
+
+    /**
+     * Sets the file extensions
+     *
+     * @param  string|array $extension The extensions to validate
+     * @return Zend_Validate_File_Extension Provides a fluent interface
+     */
+    public function setExtension($extension)
+    {
+        $this->_extension = null;
+        $this->addExtension($extension);
+        return $this;
+    }
+
+    /**
+     * Adds the file extensions
+     *
+     * @param  string|array $extension The extensions to add for validation
+     * @return Zend_Validate_File_Extension Provides a fluent interface
+     */
+    public function addExtension($extension)
+    {
+        $extensions = $this->getExtension();
+        if (is_string($extension)) {
+            $extension = explode(',', $extension);
+        }
+
+        foreach ($extension as $content) {
+            if (empty($content) || !is_string($content)) {
+                continue;
+            }
+
+            $extensions[] = trim($content);
+        }
+        $extensions = array_unique($extensions);
+
+        // Sanity check to ensure no empty values
+        foreach ($extensions as $key => $ext) {
+            if (empty($ext)) {
+                unset($extensions[$key]);
+            }
+        }
+
+        $this->_extension = implode(',', $extensions);
+
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the fileextension of $value is included in the
+     * set extension list
+     *
+     * @param  string  $value Real file to check for extension
+     * @param  array   $file  File data from Zend_File_Transfer
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        // Is file readable ?
+        require_once 'Zend/Loader.php';
+        if (!Zend_Loader::isReadable($value)) {
+            return $this->_throw($file, self::NOT_FOUND);
+        }
+
+        if ($file !== null) {
+            $info['extension'] = substr($file['name'], strrpos($file['name'], '.') + 1);
+        } else {
+            $info = pathinfo($value);
+        }
+
+        $extensions = $this->getExtension();
+
+        if ($this->_case && (in_array($info['extension'], $extensions))) {
+            return true;
+        } else if (!$this->getCase()) {
+            foreach ($extensions as $extension) {
+                if (strtolower($extension) == strtolower($info['extension'])) {
+                    return true;
+                }
+            }
+        }
+
+        return $this->_throw($file, self::FALSE_EXTENSION);
+    }
+
+    /**
+     * Throws an error of the given type
+     *
+     * @param  string $file
+     * @param  string $errorType
+     * @return false
+     */
+    protected function _throw($file, $errorType)
+    {
+        if (null !== $file) {
+            $this->_value = $file['name'];
+        }
+
+        $this->_error($errorType);
+        return false;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/ImageSize.php

@@ -0,0 +1,364 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: ImageSize.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * Validator for the image size of a image file
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_ImageSize extends Zend_Validate_Abstract
+{
+    /**
+     * @const string Error constants
+     */
+    const WIDTH_TOO_BIG    = 'fileImageSizeWidthTooBig';
+    const WIDTH_TOO_SMALL  = 'fileImageSizeWidthTooSmall';
+    const HEIGHT_TOO_BIG   = 'fileImageSizeHeightTooBig';
+    const HEIGHT_TOO_SMALL = 'fileImageSizeHeightTooSmall';
+    const NOT_DETECTED     = 'fileImageSizeNotDetected';
+    const NOT_READABLE     = 'fileImageSizeNotReadable';
+
+    /**
+     * @var array Error message template
+     */
+    protected $_messageTemplates = array(
+        self::WIDTH_TOO_BIG    => "Maximum allowed width for image '%value%' should be '%maxwidth%' but '%width%' detected",
+        self::WIDTH_TOO_SMALL  => "Minimum expected width for image '%value%' should be '%minwidth%' but '%width%' detected",
+        self::HEIGHT_TOO_BIG   => "Maximum allowed height for image '%value%' should be '%maxheight%' but '%height%' detected",
+        self::HEIGHT_TOO_SMALL => "Minimum expected height for image '%value%' should be '%minheight%' but '%height%' detected",
+        self::NOT_DETECTED     => "The size of image '%value%' could not be detected",
+        self::NOT_READABLE     => "File '%value%' is not readable or does not exist",
+    );
+
+    /**
+     * @var array Error message template variables
+     */
+    protected $_messageVariables = array(
+        'minwidth'  => '_minwidth',
+        'maxwidth'  => '_maxwidth',
+        'minheight' => '_minheight',
+        'maxheight' => '_maxheight',
+        'width'     => '_width',
+        'height'    => '_height'
+    );
+
+    /**
+     * Minimum image width
+     *
+     * @var integer
+     */
+    protected $_minwidth;
+
+    /**
+     * Maximum image width
+     *
+     * @var integer
+     */
+    protected $_maxwidth;
+
+    /**
+     * Minimum image height
+     *
+     * @var integer
+     */
+    protected $_minheight;
+
+    /**
+     * Maximum image height
+     *
+     * @var integer
+     */
+    protected $_maxheight;
+
+    /**
+     * Detected width
+     *
+     * @var integer
+     */
+    protected $_width;
+
+    /**
+     * Detected height
+     *
+     * @var integer
+     */
+    protected $_height;
+
+    /**
+     * Sets validator options
+     *
+     * Accepts the following option keys:
+     * - minheight
+     * - minwidth
+     * - maxheight
+     * - maxwidth
+     *
+     * @param  Zend_Config|array $options
+     * @return void
+     */
+    public function __construct($options)
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } elseif (1 < func_num_args()) {
+            if (!is_array($options)) {
+                $options = array('minwidth' => $options);
+            }
+            $argv = func_get_args();
+            array_shift($argv);
+            $options['minheight'] = array_shift($argv);
+            if (!empty($argv)) {
+                $options['maxwidth'] = array_shift($argv);
+                if (!empty($argv)) {
+                    $options['maxheight'] = array_shift($argv);
+                }
+            }
+        } else if (!is_array($options)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception ('Invalid options to validator provided');
+        }
+
+        if (isset($options['minheight']) || isset($options['minwidth'])) {
+            $this->setImageMin($options);
+        }
+
+        if (isset($options['maxheight']) || isset($options['maxwidth'])) {
+            $this->setImageMax($options);
+        }
+    }
+
+    /**
+     * Returns the set minimum image sizes
+     *
+     * @return array
+     */
+    public function getImageMin()
+    {
+        return array('minwidth' => $this->_minwidth, 'minheight' => $this->_minheight);
+    }
+
+    /**
+     * Returns the set maximum image sizes
+     *
+     * @return array
+     */
+    public function getImageMax()
+    {
+        return array('maxwidth' => $this->_maxwidth, 'maxheight' => $this->_maxheight);
+    }
+
+    /**
+     * Returns the set image width sizes
+     *
+     * @return array
+     */
+    public function getImageWidth()
+    {
+        return array('minwidth' => $this->_minwidth, 'maxwidth' => $this->_maxwidth);
+    }
+
+    /**
+     * Returns the set image height sizes
+     *
+     * @return array
+     */
+    public function getImageHeight()
+    {
+        return array('minheight' => $this->_minheight, 'maxheight' => $this->_maxheight);
+    }
+
+    /**
+     * Sets the minimum image size
+     *
+     * @param  array $options               The minimum image dimensions
+     * @throws Zend_Validate_Exception      When minwidth is greater than maxwidth
+     * @throws Zend_Validate_Exception      When minheight is greater than maxheight
+     * @return Zend_Validate_File_ImageSize Provides a fluent interface
+     */
+    public function setImageMin($options)
+    {
+        if (isset($options['minwidth'])) {
+            if (($this->_maxwidth !== null) and ($options['minwidth'] > $this->_maxwidth)) {
+                require_once 'Zend/Validate/Exception.php';
+                throw new Zend_Validate_Exception("The minimum image width must be less than or equal to the "
+                    . " maximum image width, but {$options['minwidth']} > {$this->_maxwidth}");
+            }
+        }
+
+        if (isset($options['maxheight'])) {
+            if (($this->_maxheight !== null) and ($options['minheight'] > $this->_maxheight)) {
+                require_once 'Zend/Validate/Exception.php';
+                throw new Zend_Validate_Exception("The minimum image height must be less than or equal to the "
+                    . " maximum image height, but {$options['minheight']} > {$this->_maxheight}");
+            }
+        }
+
+        if (isset($options['minwidth'])) {
+            $this->_minwidth  = (int) $options['minwidth'];
+        }
+
+        if (isset($options['minheight'])) {
+            $this->_minheight = (int) $options['minheight'];
+        }
+
+        return $this;
+    }
+
+    /**
+     * Sets the maximum image size
+     *
+     * @param  array $options          The maximum image dimensions
+     * @throws Zend_Validate_Exception When maxwidth is smaller than minwidth
+     * @throws Zend_Validate_Exception When maxheight is smaller than minheight
+     * @return Zend_Validate_StringLength Provides a fluent interface
+     */
+    public function setImageMax($options)
+    {
+        if (isset($options['maxwidth'])) {
+            if (($this->_minwidth !== null) and ($options['maxwidth'] < $this->_minwidth)) {
+                require_once 'Zend/Validate/Exception.php';
+                throw new Zend_Validate_Exception("The maximum image width must be greater than or equal to the "
+                    . "minimum image width, but {$options['maxwidth']} < {$this->_minwidth}");
+            }
+        }
+
+        if (isset($options['maxheight'])) {
+            if (($this->_minheight !== null) and ($options['maxheight'] < $this->_minheight)) {
+                require_once 'Zend/Validate/Exception.php';
+                throw new Zend_Validate_Exception("The maximum image height must be greater than or equal to the "
+                    . "minimum image height, but {$options['maxheight']} < {$this->_minwidth}");
+            }
+        }
+
+        if (isset($options['maxwidth'])) {
+            $this->_maxwidth  = (int) $options['maxwidth'];
+        }
+
+        if (isset($options['maxheight'])) {
+            $this->_maxheight = (int) $options['maxheight'];
+        }
+
+        return $this;
+    }
+
+    /**
+     * Sets the mimimum and maximum image width
+     *
+     * @param  array $options               The image width dimensions
+     * @return Zend_Validate_File_ImageSize Provides a fluent interface
+     */
+    public function setImageWidth($options)
+    {
+        $this->setImageMin($options);
+        $this->setImageMax($options);
+
+        return $this;
+    }
+
+    /**
+     * Sets the mimimum and maximum image height
+     *
+     * @param  array $options               The image height dimensions
+     * @return Zend_Validate_File_ImageSize Provides a fluent interface
+     */
+    public function setImageHeight($options)
+    {
+        $this->setImageMin($options);
+        $this->setImageMax($options);
+
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the imagesize of $value is at least min and
+     * not bigger than max
+     *
+     * @param  string $value Real file to check for image size
+     * @param  array  $file  File data from Zend_File_Transfer
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        // Is file readable ?
+        require_once 'Zend/Loader.php';
+        if (!Zend_Loader::isReadable($value)) {
+            return $this->_throw($file, self::NOT_READABLE);
+        }
+
+        $size = @getimagesize($value);
+        $this->_setValue($file);
+
+        if (empty($size) or ($size[0] === 0) or ($size[1] === 0)) {
+            return $this->_throw($file, self::NOT_DETECTED);
+        }
+
+        $this->_width  = $size[0];
+        $this->_height = $size[1];
+        if ($this->_width < $this->_minwidth) {
+            $this->_throw($file, self::WIDTH_TOO_SMALL);
+        }
+
+        if (($this->_maxwidth !== null) and ($this->_maxwidth < $this->_width)) {
+            $this->_throw($file, self::WIDTH_TOO_BIG);
+        }
+
+        if ($this->_height < $this->_minheight) {
+            $this->_throw($file, self::HEIGHT_TOO_SMALL);
+        }
+
+        if (($this->_maxheight !== null) and ($this->_maxheight < $this->_height)) {
+            $this->_throw($file, self::HEIGHT_TOO_BIG);
+        }
+
+        if (count($this->_messages) > 0) {
+            return false;
+        }
+
+        return true;
+    }
+
+    /**
+     * Throws an error of the given type
+     *
+     * @param  string $file
+     * @param  string $errorType
+     * @return false
+     */
+    protected function _throw($file, $errorType)
+    {
+        if ($file !== null) {
+            $this->_value = $file['name'];
+        }
+
+        $this->_error($errorType);
+        return false;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/IsImage.php

@@ -0,0 +1,173 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: IsImage.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_File_MimeType
+ */
+require_once 'Zend/Validate/File/MimeType.php';
+
+/**
+ * Validator which checks if the file already exists in the directory
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_IsImage extends Zend_Validate_File_MimeType
+{
+    /**
+     * @const string Error constants
+     */
+    const FALSE_TYPE   = 'fileIsImageFalseType';
+    const NOT_DETECTED = 'fileIsImageNotDetected';
+    const NOT_READABLE = 'fileIsImageNotReadable';
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::FALSE_TYPE   => "File '%value%' is no image, '%type%' detected",
+        self::NOT_DETECTED => "The mimetype of file '%value%' could not be detected",
+        self::NOT_READABLE => "File '%value%' is not readable or does not exist",
+    );
+
+    /**
+     * Sets validator options
+     *
+     * @param  string|array|Zend_Config $mimetype
+     * @return void
+     */
+    public function __construct($mimetype = array())
+    {
+        if ($mimetype instanceof Zend_Config) {
+            $mimetype = $mimetype->toArray();
+        }
+
+        $temp    = array();
+        // http://de.wikipedia.org/wiki/Liste_von_Dateiendungen
+        // http://www.iana.org/assignments/media-types/image/
+        $default = array(
+            'application/cdf',
+            'application/dicom',
+            'application/fractals',
+            'application/postscript',
+            'application/vnd.hp-hpgl',
+            'application/vnd.oasis.opendocument.graphics',
+            'application/x-cdf',
+            'application/x-cmu-raster',
+            'application/x-ima',
+            'application/x-inventor',
+            'application/x-koan',
+            'application/x-portable-anymap',
+            'application/x-world-x-3dmf',
+            'image/bmp',
+            'image/c',
+            'image/cgm',
+            'image/fif',
+            'image/gif',
+            'image/jpeg',
+            'image/jpm',
+            'image/jpx',
+            'image/jp2',
+            'image/naplps',
+            'image/pjpeg',
+            'image/png',
+            'image/svg',
+            'image/svg+xml',
+            'image/tiff',
+            'image/vnd.adobe.photoshop',
+            'image/vnd.djvu',
+            'image/vnd.fpx',
+            'image/vnd.net-fpx',
+            'image/x-cmu-raster',
+            'image/x-cmx',
+            'image/x-coreldraw',
+            'image/x-cpi',
+            'image/x-emf',
+            'image/x-ico',
+            'image/x-icon',
+            'image/x-jg',
+            'image/x-ms-bmp',
+            'image/x-niff',
+            'image/x-pict',
+            'image/x-pcx',
+            'image/x-portable-anymap',
+            'image/x-portable-bitmap',
+            'image/x-portable-greymap',
+            'image/x-portable-pixmap',
+            'image/x-quicktime',
+            'image/x-rgb',
+            'image/x-tiff',
+            'image/x-unknown',
+            'image/x-windows-bmp',
+            'image/x-xpmi',
+        );
+
+        if (is_array($mimetype)) {
+            $temp = $mimetype;
+            if (array_key_exists('magicfile', $temp)) {
+                unset($temp['magicfile']);
+            }
+
+            if (array_key_exists('headerCheck', $temp)) {
+                unset($temp['headerCheck']);
+            }
+
+            if (empty($temp)) {
+                $mimetype += $default;
+            }
+        }
+
+        if (empty($mimetype)) {
+            $mimetype = $default;
+        }
+
+        parent::__construct($mimetype);
+    }
+
+    /**
+     * Throws an error of the given type
+     * Duplicates parent method due to OOP Problem with late static binding in PHP 5.2
+     *
+     * @param  string $file
+     * @param  string $errorType
+     * @return false
+     */
+    protected function _throw($file, $errorType)
+    {
+        $this->_value = $file['name'];
+        switch($errorType) {
+            case Zend_Validate_File_MimeType::FALSE_TYPE :
+                $errorType = self::FALSE_TYPE;
+                break;
+            case Zend_Validate_File_MimeType::NOT_DETECTED :
+                $errorType = self::NOT_DETECTED;
+                break;
+            case Zend_Validate_File_MimeType::NOT_READABLE :
+                $errorType = self::NOT_READABLE;
+                break;
+        }
+
+        $this->_error($errorType);
+        return false;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/Exists.php

@@ -0,0 +1,203 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: Exists.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * Validator which checks if the file already exists in the directory
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_Exists extends Zend_Validate_Abstract
+{
+    /**
+     * @const string Error constants
+     */
+    const DOES_NOT_EXIST = 'fileExistsDoesNotExist';
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::DOES_NOT_EXIST => "File '%value%' does not exist",
+    );
+
+    /**
+     * Internal list of directories
+     * @var string
+     */
+    protected $_directory = '';
+
+    /**
+     * @var array Error message template variables
+     */
+    protected $_messageVariables = array(
+        'directory' => '_directory'
+    );
+
+    /**
+     * Sets validator options
+     *
+     * @param  string|array|Zend_Config $directory
+     * @return void
+     */
+    public function __construct($directory = array())
+    {
+        if ($directory instanceof Zend_Config) {
+            $directory = $directory->toArray();
+        } else if (is_string($directory)) {
+            $directory = explode(',', $directory);
+        } else if (!is_array($directory)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception ('Invalid options to validator provided');
+        }
+
+        $this->setDirectory($directory);
+    }
+
+    /**
+     * Returns the set file directories which are checked
+     *
+     * @param  boolean $asArray Returns the values as array, when false an concated string is returned
+     * @return string
+     */
+    public function getDirectory($asArray = false)
+    {
+        $asArray   = (bool) $asArray;
+        $directory = (string) $this->_directory;
+        if ($asArray) {
+            $directory = explode(',', $directory);
+        }
+
+        return $directory;
+    }
+
+    /**
+     * Sets the file directory which will be checked
+     *
+     * @param  string|array $directory The directories to validate
+     * @return Zend_Validate_File_Extension Provides a fluent interface
+     */
+    public function setDirectory($directory)
+    {
+        $this->_directory = null;
+        $this->addDirectory($directory);
+        return $this;
+    }
+
+    /**
+     * Adds the file directory which will be checked
+     *
+     * @param  string|array $directory The directory to add for validation
+     * @return Zend_Validate_File_Extension Provides a fluent interface
+     */
+    public function addDirectory($directory)
+    {
+        $directories = $this->getDirectory(true);
+
+        if (is_string($directory)) {
+            $directory = explode(',', $directory);
+        } else if (!is_array($directory)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception ('Invalid options to validator provided');
+        }
+
+        foreach ($directory as $content) {
+            if (empty($content) || !is_string($content)) {
+                continue;
+            }
+
+            $directories[] = trim($content);
+        }
+        $directories = array_unique($directories);
+
+        // Sanity check to ensure no empty values
+        foreach ($directories as $key => $dir) {
+            if (empty($dir)) {
+                unset($directories[$key]);
+            }
+        }
+
+        $this->_directory = implode(',', $directories);
+
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the file already exists in the set directories
+     *
+     * @param  string  $value Real file to check for existance
+     * @param  array   $file  File data from Zend_File_Transfer
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        $directories = $this->getDirectory(true);
+        if (($file !== null) and (!empty($file['destination']))) {
+            $directories[] = $file['destination'];
+        } else if (!isset($file['name'])) {
+            $file['name'] = $value;
+        }
+
+        $check = false;
+        foreach ($directories as $directory) {
+            if (empty($directory)) {
+                continue;
+            }
+
+            $check = true;
+            if (!file_exists($directory . DIRECTORY_SEPARATOR . $file['name'])) {
+                return $this->_throw($file, self::DOES_NOT_EXIST);
+            }
+        }
+
+        if (!$check) {
+            return $this->_throw($file, self::DOES_NOT_EXIST);
+        }
+
+        return true;
+    }
+
+    /**
+     * Throws an error of the given type
+     *
+     * @param  string $file
+     * @param  string $errorType
+     * @return false
+     */
+    protected function _throw($file, $errorType)
+    {
+        if ($file !== null) {
+            $this->_value = $file['name'];
+        }
+
+        $this->_error($errorType);
+        return false;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/Hash.php

@@ -0,0 +1,194 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: Hash.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * Validator for the hash of given files
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_Hash extends Zend_Validate_Abstract
+{
+    /**
+     * @const string Error constants
+     */
+    const DOES_NOT_MATCH = 'fileHashDoesNotMatch';
+    const NOT_DETECTED   = 'fileHashHashNotDetected';
+    const NOT_FOUND      = 'fileHashNotFound';
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::DOES_NOT_MATCH => "File '%value%' does not match the given hashes",
+        self::NOT_DETECTED   => "A hash could not be evaluated for the given file",
+        self::NOT_FOUND      => "File '%value%' is not readable or does not exist"
+    );
+
+    /**
+     * Hash of the file
+     *
+     * @var string
+     */
+    protected $_hash;
+
+    /**
+     * Sets validator options
+     *
+     * @param  string|array $options
+     * @return void
+     */
+    public function __construct($options)
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } elseif (is_scalar($options)) {
+            $options = array('hash1' => $options);
+        } elseif (!is_array($options)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Invalid options to validator provided');
+        }
+
+        if (1 < func_num_args()) {
+            $options['algorithm'] = func_get_arg(1);
+        }
+
+        $this->setHash($options);
+    }
+
+    /**
+     * Returns the set hash values as array, the hash as key and the algorithm the value
+     *
+     * @return array
+     */
+    public function getHash()
+    {
+        return $this->_hash;
+    }
+
+    /**
+     * Sets the hash for one or multiple files
+     *
+     * @param  string|array $options
+     * @return Zend_Validate_File_Hash Provides a fluent interface
+     */
+    public function setHash($options)
+    {
+        $this->_hash  = null;
+        $this->addHash($options);
+
+        return $this;
+    }
+
+    /**
+     * Adds the hash for one or multiple files
+     *
+     * @param  string|array $options
+     * @return Zend_Validate_File_Hash Provides a fluent interface
+     */
+    public function addHash($options)
+    {
+        if (is_string($options)) {
+            $options = array($options);
+        } else if (!is_array($options)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("False parameter given");
+        }
+
+        $known = hash_algos();
+        if (!isset($options['algorithm'])) {
+            $algorithm = 'crc32';
+        } else {
+            $algorithm = $options['algorithm'];
+            unset($options['algorithm']);
+        }
+
+        if (!in_array($algorithm, $known)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("Unknown algorithm '{$algorithm}'");
+        }
+
+        foreach ($options as $value) {
+            $this->_hash[$value] = $algorithm;
+        }
+
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the given file confirms the set hash
+     *
+     * @param  string $value Filename to check for hash
+     * @param  array  $file  File data from Zend_File_Transfer
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        // Is file readable ?
+        require_once 'Zend/Loader.php';
+        if (!Zend_Loader::isReadable($value)) {
+            return $this->_throw($file, self::NOT_FOUND);
+        }
+
+        $algos  = array_unique(array_values($this->_hash));
+        $hashes = array_unique(array_keys($this->_hash));
+        foreach ($algos as $algorithm) {
+            $filehash = hash_file($algorithm, $value);
+            if ($filehash === false) {
+                return $this->_throw($file, self::NOT_DETECTED);
+            }
+
+            foreach($hashes as $hash) {
+                if ($filehash === $hash) {
+                    return true;
+                }
+            }
+        }
+
+        return $this->_throw($file, self::DOES_NOT_MATCH);
+    }
+
+    /**
+     * Throws an error of the given type
+     *
+     * @param  string $file
+     * @param  string $errorType
+     * @return false
+     */
+    protected function _throw($file, $errorType)
+    {
+        if ($file !== null) {
+            $this->_value = $file['name'];
+        }
+
+        $this->_error($errorType);
+        return false;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/WordCount.php

@@ -0,0 +1,101 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: WordCount.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_File_Count
+ */
+require_once 'Zend/Validate/File/Count.php';
+
+/**
+ * Validator for counting all words in a file
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_WordCount extends Zend_Validate_File_Count
+{
+    /**#@+
+     * @const string Error constants
+     */
+    const TOO_MUCH  = 'fileWordCountTooMuch';
+    const TOO_LESS  = 'fileWordCountTooLess';
+    const NOT_FOUND = 'fileWordCountNotFound';
+    /**#@-*/
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::TOO_MUCH => "Too much words, maximum '%max%' are allowed but '%count%' were counted",
+        self::TOO_LESS => "Too less words, minimum '%min%' are expected but '%count%' were counted",
+        self::NOT_FOUND => "File '%value%' is not readable or does not exist",
+    );
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the counted words are at least min and
+     * not bigger than max (when max is not null).
+     *
+     * @param  string $value Filename to check for word count
+     * @param  array  $file  File data from Zend_File_Transfer
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        // Is file readable ?
+        require_once 'Zend/Loader.php';
+        if (!Zend_Loader::isReadable($value)) {
+            return $this->_throw($file, self::NOT_FOUND);
+        }
+
+        $content = file_get_contents($value);
+        $this->_count = str_word_count($content);
+        if (($this->_max !== null) && ($this->_count > $this->_max)) {
+            return $this->_throw($file, self::TOO_MUCH);
+        }
+
+        if (($this->_min !== null) && ($this->_count < $this->_min)) {
+            return $this->_throw($file, self::TOO_LESS);
+        }
+
+        return true;
+    }
+
+    /**
+     * Throws an error of the given type
+     *
+     * @param  string $file
+     * @param  string $errorType
+     * @return false
+     */
+    protected function _throw($file, $errorType)
+    {
+        if ($file !== null) {
+            $this->_value = $file['name'];
+        }
+
+        $this->_error($errorType);
+        return false;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/Upload.php

@@ -0,0 +1,251 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: Upload.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * Validator for the maximum size of a file up to a max of 2GB
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_Upload extends Zend_Validate_Abstract
+{
+    /**@#+
+     * @const string Error constants
+     */
+    const INI_SIZE       = 'fileUploadErrorIniSize';
+    const FORM_SIZE      = 'fileUploadErrorFormSize';
+    const PARTIAL        = 'fileUploadErrorPartial';
+    const NO_FILE        = 'fileUploadErrorNoFile';
+    const NO_TMP_DIR     = 'fileUploadErrorNoTmpDir';
+    const CANT_WRITE     = 'fileUploadErrorCantWrite';
+    const EXTENSION      = 'fileUploadErrorExtension';
+    const ATTACK         = 'fileUploadErrorAttack';
+    const FILE_NOT_FOUND = 'fileUploadErrorFileNotFound';
+    const UNKNOWN        = 'fileUploadErrorUnknown';
+    /**@#-*/
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::INI_SIZE       => "File '%value%' exceeds the defined ini size",
+        self::FORM_SIZE      => "File '%value%' exceeds the defined form size",
+        self::PARTIAL        => "File '%value%' was only partially uploaded",
+        self::NO_FILE        => "File '%value%' was not uploaded",
+        self::NO_TMP_DIR     => "No temporary directory was found for file '%value%'",
+        self::CANT_WRITE     => "File '%value%' can't be written",
+        self::EXTENSION      => "A PHP extension returned an error while uploading the file '%value%'",
+        self::ATTACK         => "File '%value%' was illegally uploaded. This could be a possible attack",
+        self::FILE_NOT_FOUND => "File '%value%' was not found",
+        self::UNKNOWN        => "Unknown error while uploading file '%value%'"
+    );
+
+    /**
+     * Internal array of files
+     * @var array
+     */
+    protected $_files = array();
+
+    /**
+     * Sets validator options
+     *
+     * The array $files must be given in syntax of Zend_File_Transfer to be checked
+     * If no files are given the $_FILES array will be used automatically.
+     * NOTE: This validator will only work with HTTP POST uploads!
+     *
+     * @param  array|Zend_Config $files Array of files in syntax of Zend_File_Transfer
+     * @return void
+     */
+    public function __construct($files = array())
+    {
+        if ($files instanceof Zend_Config) {
+            $files = $files->toArray();
+        }
+
+        $this->setFiles($files);
+    }
+
+    /**
+     * Returns the array of set files
+     *
+     * @param  string $files (Optional) The file to return in detail
+     * @return array
+     * @throws Zend_Validate_Exception If file is not found
+     */
+    public function getFiles($file = null)
+    {
+        if ($file !== null) {
+            $return = array();
+            foreach ($this->_files as $name => $content) {
+                if ($name === $file) {
+                    $return[$file] = $this->_files[$name];
+                }
+
+                if ($content['name'] === $file) {
+                    $return[$name] = $this->_files[$name];
+                }
+            }
+
+            if (count($return) === 0) {
+                require_once 'Zend/Validate/Exception.php';
+                throw new Zend_Validate_Exception("The file '$file' was not found");
+            }
+
+            return $return;
+        }
+
+        return $this->_files;
+    }
+
+    /**
+     * Sets the files to be checked
+     *
+     * @param  array $files The files to check in syntax of Zend_File_Transfer
+     * @return Zend_Validate_File_Upload Provides a fluent interface
+     */
+    public function setFiles($files = array())
+    {
+        if (count($files) === 0) {
+            $this->_files = $_FILES;
+        } else {
+            $this->_files = $files;
+        }
+
+        // see ZF-10738
+        if (is_null($this->_files)) {
+            $this->_files = array();
+        }
+
+        foreach($this->_files as $file => $content) {
+            if (!isset($content['error'])) {
+                unset($this->_files[$file]);
+            }
+        }
+
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the file was uploaded without errors
+     *
+     * @param  string $value Single file to check for upload errors, when giving null the $_FILES array
+     *                       from initialization will be used
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        $this->_messages = null;
+        if (array_key_exists($value, $this->_files)) {
+            $files[$value] = $this->_files[$value];
+        } else {
+            foreach ($this->_files as $file => $content) {
+                if (isset($content['name']) && ($content['name'] === $value)) {
+                    $files[$file] = $this->_files[$file];
+                }
+
+                if (isset($content['tmp_name']) && ($content['tmp_name'] === $value)) {
+                    $files[$file] = $this->_files[$file];
+                }
+            }
+        }
+
+        if (empty($files)) {
+            return $this->_throw($file, self::FILE_NOT_FOUND);
+        }
+
+        foreach ($files as $file => $content) {
+            $this->_value = $file;
+            switch($content['error']) {
+                case 0:
+                    if (!is_uploaded_file($content['tmp_name'])) {
+                        $this->_throw($file, self::ATTACK);
+                    }
+                    break;
+
+                case 1:
+                    $this->_throw($file, self::INI_SIZE);
+                    break;
+
+                case 2:
+                    $this->_throw($file, self::FORM_SIZE);
+                    break;
+
+                case 3:
+                    $this->_throw($file, self::PARTIAL);
+                    break;
+
+                case 4:
+                    $this->_throw($file, self::NO_FILE);
+                    break;
+
+                case 6:
+                    $this->_throw($file, self::NO_TMP_DIR);
+                    break;
+
+                case 7:
+                    $this->_throw($file, self::CANT_WRITE);
+                    break;
+
+                case 8:
+                    $this->_throw($file, self::EXTENSION);
+                    break;
+
+                default:
+                    $this->_throw($file, self::UNKNOWN);
+                    break;
+            }
+        }
+
+        if (count($this->_messages) > 0) {
+            return false;
+        } else {
+            return true;
+        }
+    }
+
+    /**
+     * Throws an error of the given type
+     *
+     * @param  string $file
+     * @param  string $errorType
+     * @return false
+     */
+    protected function _throw($file, $errorType)
+    {
+        if ($file !== null) {
+            if (is_array($file) and !empty($file['name'])) {
+                $this->_value = $file['name'];
+            }
+        }
+
+        $this->_error($errorType);
+        return false;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/Md5.php

@@ -0,0 +1,183 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: Md5.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_File_Hash
+ */
+require_once 'Zend/Validate/File/Hash.php';
+
+/**
+ * Validator for the md5 hash of given files
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_Md5 extends Zend_Validate_File_Hash
+{
+    /**
+     * @const string Error constants
+     */
+    const DOES_NOT_MATCH = 'fileMd5DoesNotMatch';
+    const NOT_DETECTED   = 'fileMd5NotDetected';
+    const NOT_FOUND      = 'fileMd5NotFound';
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::DOES_NOT_MATCH => "File '%value%' does not match the given md5 hashes",
+        self::NOT_DETECTED   => "A md5 hash could not be evaluated for the given file",
+        self::NOT_FOUND      => "File '%value%' is not readable or does not exist",
+    );
+
+    /**
+     * Hash of the file
+     *
+     * @var string
+     */
+    protected $_hash;
+
+    /**
+     * Sets validator options
+     *
+     * $hash is the hash we accept for the file $file
+     *
+     * @param  string|array $options
+     * @return void
+     */
+    public function __construct($options)
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } elseif (is_scalar($options)) {
+            $options = array('hash1' => $options);
+        } elseif (!is_array($options)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Invalid options to validator provided');
+        }
+
+        $this->setMd5($options);
+    }
+
+    /**
+     * Returns all set md5 hashes
+     *
+     * @return array
+     */
+    public function getMd5()
+    {
+        return $this->getHash();
+    }
+
+    /**
+     * Sets the md5 hash for one or multiple files
+     *
+     * @param  string|array $options
+     * @param  string       $algorithm (Deprecated) Algorithm to use, fixed to md5
+     * @return Zend_Validate_File_Hash Provides a fluent interface
+     */
+    public function setHash($options)
+    {
+        if (!is_array($options)) {
+            $options = (array) $options;
+        }
+
+        $options['algorithm'] = 'md5';
+        parent::setHash($options);
+        return $this;
+    }
+
+    /**
+     * Sets the md5 hash for one or multiple files
+     *
+     * @param  string|array $options
+     * @return Zend_Validate_File_Hash Provides a fluent interface
+     */
+    public function setMd5($options)
+    {
+        $this->setHash($options);
+        return $this;
+    }
+
+    /**
+     * Adds the md5 hash for one or multiple files
+     *
+     * @param  string|array $options
+     * @param  string       $algorithm (Deprecated) Algorithm to use, fixed to md5
+     * @return Zend_Validate_File_Hash Provides a fluent interface
+     */
+    public function addHash($options)
+    {
+        if (!is_array($options)) {
+            $options = (array) $options;
+        }
+
+        $options['algorithm'] = 'md5';
+        parent::addHash($options);
+        return $this;
+    }
+
+    /**
+     * Adds the md5 hash for one or multiple files
+     *
+     * @param  string|array $options
+     * @return Zend_Validate_File_Hash Provides a fluent interface
+     */
+    public function addMd5($options)
+    {
+        $this->addHash($options);
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the given file confirms the set hash
+     *
+     * @param  string $value Filename to check for hash
+     * @param  array  $file  File data from Zend_File_Transfer
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        // Is file readable ?
+        require_once 'Zend/Loader.php';
+        if (!Zend_Loader::isReadable($value)) {
+            return $this->_throw($file, self::NOT_FOUND);
+        }
+
+        $hashes = array_unique(array_keys($this->_hash));
+        $filehash = hash_file('md5', $value);
+        if ($filehash === false) {
+            return $this->_throw($file, self::NOT_DETECTED);
+        }
+
+        foreach($hashes as $hash) {
+            if ($filehash === $hash) {
+                return true;
+            }
+        }
+
+        return $this->_throw($file, self::DOES_NOT_MATCH);
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/ExcludeExtension.php

@@ -0,0 +1,94 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: ExcludeExtension.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/File/Extension.php';
+
+/**
+ * Validator for the excluding file extensions
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_ExcludeExtension extends Zend_Validate_File_Extension
+{
+    /**
+     * @const string Error constants
+     */
+    const FALSE_EXTENSION = 'fileExcludeExtensionFalse';
+    const NOT_FOUND       = 'fileExcludeExtensionNotFound';
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::FALSE_EXTENSION => "File '%value%' has a false extension",
+        self::NOT_FOUND       => "File '%value%' is not readable or does not exist",
+    );
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the fileextension of $value is not included in the
+     * set extension list
+     *
+     * @param  string  $value Real file to check for extension
+     * @param  array   $file  File data from Zend_File_Transfer
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        // Is file readable ?
+        require_once 'Zend/Loader.php';
+        if (!Zend_Loader::isReadable($value)) {
+            return $this->_throw($file, self::NOT_FOUND);
+        }
+
+        if ($file !== null) {
+            $info['extension'] = substr($file['name'], strrpos($file['name'], '.') + 1);
+        } else {
+            $info = pathinfo($value);
+        }
+
+        $extensions = $this->getExtension();
+
+        if ($this->_case and (!in_array($info['extension'], $extensions))) {
+            return true;
+        } else if (!$this->_case) {
+            $found = false;
+            foreach ($extensions as $extension) {
+                if (strtolower($extension) == strtolower($info['extension'])) {
+                    $found = true;
+                }
+            }
+
+            if (!$found) {
+                return true;
+            }
+        }
+
+        return $this->_throw($file, self::FALSE_EXTENSION);
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/File/Count.php

@@ -0,0 +1,284 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ * @version   $Id: Count.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * Validator for counting all given files
+ *
+ * @category  Zend
+ * @package   Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license   http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_File_Count extends Zend_Validate_Abstract
+{
+    /**#@+
+     * @const string Error constants
+     */
+    const TOO_MANY = 'fileCountTooMany';
+    const TOO_FEW  = 'fileCountTooFew';
+    /**#@-*/
+
+    /**
+     * @var array Error message templates
+     */
+    protected $_messageTemplates = array(
+        self::TOO_MANY => "Too many files, maximum '%max%' are allowed but '%count%' are given",
+        self::TOO_FEW  => "Too few files, minimum '%min%' are expected but '%count%' are given",
+    );
+
+    /**
+     * @var array Error message template variables
+     */
+    protected $_messageVariables = array(
+        'min'   => '_min',
+        'max'   => '_max',
+        'count' => '_count'
+    );
+
+    /**
+     * Minimum file count
+     *
+     * If null, there is no minimum file count
+     *
+     * @var integer
+     */
+    protected $_min;
+
+    /**
+     * Maximum file count
+     *
+     * If null, there is no maximum file count
+     *
+     * @var integer|null
+     */
+    protected $_max;
+
+    /**
+     * Actual filecount
+     *
+     * @var integer
+     */
+    protected $_count;
+
+    /**
+     * Internal file array
+     * @var array
+     */
+    protected $_files;
+
+    /**
+     * Sets validator options
+     *
+     * Min limits the file count, when used with max=null it is the maximum file count
+     * It also accepts an array with the keys 'min' and 'max'
+     *
+     * If $options is a integer, it will be used as maximum file count
+     * As Array is accepts the following keys:
+     * 'min': Minimum filecount
+     * 'max': Maximum filecount
+     *
+     * @param  integer|array|Zend_Config $options Options for the adapter
+     * @return void
+     */
+    public function __construct($options)
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } elseif (is_string($options) || is_numeric($options)) {
+            $options = array('max' => $options);
+        } elseif (!is_array($options)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception ('Invalid options to validator provided');
+        }
+
+        if (1 < func_num_args()) {
+            $options['min'] = func_get_arg(0);
+            $options['max'] = func_get_arg(1);
+        }
+
+        if (isset($options['min'])) {
+            $this->setMin($options);
+        }
+
+        if (isset($options['max'])) {
+            $this->setMax($options);
+        }
+    }
+
+    /**
+     * Returns the minimum file count
+     *
+     * @return integer
+     */
+    public function getMin()
+    {
+        return $this->_min;
+    }
+
+    /**
+     * Sets the minimum file count
+     *
+     * @param  integer|array $min The minimum file count
+     * @return Zend_Validate_File_Count Provides a fluent interface
+     * @throws Zend_Validate_Exception When min is greater than max
+     */
+    public function setMin($min)
+    {
+        if (is_array($min) and isset($min['min'])) {
+            $min = $min['min'];
+        }
+
+        if (!is_string($min) and !is_numeric($min)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception ('Invalid options to validator provided');
+        }
+
+        $min = (integer) $min;
+        if (($this->_max !== null) && ($min > $this->_max)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("The minimum must be less than or equal to the maximum file count, but $min >"
+                                            . " {$this->_max}");
+        }
+
+        $this->_min = $min;
+        return $this;
+    }
+
+    /**
+     * Returns the maximum file count
+     *
+     * @return integer
+     */
+    public function getMax()
+    {
+        return $this->_max;
+    }
+
+    /**
+     * Sets the maximum file count
+     *
+     * @param  integer|array $max The maximum file count
+     * @return Zend_Validate_StringLength Provides a fluent interface
+     * @throws Zend_Validate_Exception When max is smaller than min
+     */
+    public function setMax($max)
+    {
+        if (is_array($max) and isset($max['max'])) {
+            $max = $max['max'];
+        }
+
+        if (!is_string($max) and !is_numeric($max)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception ('Invalid options to validator provided');
+        }
+
+        $max = (integer) $max;
+        if (($this->_min !== null) && ($max < $this->_min)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("The maximum must be greater than or equal to the minimum file count, but "
+                                            . "$max < {$this->_min}");
+        }
+
+        $this->_max = $max;
+        return $this;
+    }
+
+    /**
+     * Adds a file for validation
+     *
+     * @param string|array $file
+     */
+    public function addFile($file)
+    {
+        if (is_string($file)) {
+            $file = array($file);
+        }
+
+        if (is_array($file)) {
+            foreach ($file as $name) {
+                if (!isset($this->_files[$name]) && !empty($name)) {
+                    $this->_files[$name] = $name;
+                }
+            }
+        }
+
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the file count of all checked files is at least min and
+     * not bigger than max (when max is not null). Attention: When checking with set min you
+     * must give all files with the first call, otherwise you will get an false.
+     *
+     * @param  string|array $value Filenames to check for count
+     * @param  array        $file  File data from Zend_File_Transfer
+     * @return boolean
+     */
+    public function isValid($value, $file = null)
+    {
+        if (($file !== null) && !array_key_exists('destination', $file)) {
+            $file['destination'] = dirname($value);
+        }
+
+        if (($file !== null) && array_key_exists('tmp_name', $file)) {
+            $value = $file['destination'] . DIRECTORY_SEPARATOR . $file['name'];
+        }
+
+        if (($file === null) || !empty($file['tmp_name'])) {
+            $this->addFile($value);
+        }
+
+        $this->_count = count($this->_files);
+        if (($this->_max !== null) && ($this->_count > $this->_max)) {
+            return $this->_throw($file, self::TOO_MANY);
+        }
+
+        if (($this->_min !== null) && ($this->_count < $this->_min)) {
+            return $this->_throw($file, self::TOO_FEW);
+        }
+
+        return true;
+    }
+
+    /**
+     * Throws an error of the given type
+     *
+     * @param  string $file
+     * @param  string $errorType
+     * @return false
+     */
+    protected function _throw($file, $errorType)
+    {
+        if ($file !== null) {
+            $this->_value = $file['name'];
+        }
+
+        $this->_error($errorType);
+        return false;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Identical.php

@@ -0,0 +1,164 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Identical.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/** @see Zend_Validate_Abstract */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Identical extends Zend_Validate_Abstract
+{
+    /**
+     * Error codes
+     * @const string
+     */
+    const NOT_SAME      = 'notSame';
+    const MISSING_TOKEN = 'missingToken';
+
+    /**
+     * Error messages
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::NOT_SAME      => "The two given tokens do not match",
+        self::MISSING_TOKEN => 'No token was provided to match against',
+    );
+
+    /**
+     * @var array
+     */
+    protected $_messageVariables = array(
+        'token' => '_tokenString'
+    );
+
+    /**
+     * Original token against which to validate
+     * @var string
+     */
+    protected $_tokenString;
+    protected $_token;
+    protected $_strict = true;
+
+    /**
+     * Sets validator options
+     *
+     * @param  mixed $token
+     * @return void
+     */
+    public function __construct($token = null)
+    {
+        if ($token instanceof Zend_Config) {
+            $token = $token->toArray();
+        }
+
+        if (is_array($token) && array_key_exists('token', $token)) {
+            if (array_key_exists('strict', $token)) {
+                $this->setStrict($token['strict']);
+            }
+
+            $this->setToken($token['token']);
+        } else if (null !== $token) {
+            $this->setToken($token);
+        }
+    }
+
+    /**
+     * Retrieve token
+     *
+     * @return string
+     */
+    public function getToken()
+    {
+        return $this->_token;
+    }
+
+    /**
+     * Set token against which to compare
+     *
+     * @param  mixed $token
+     * @return Zend_Validate_Identical
+     */
+    public function setToken($token)
+    {
+        $this->_tokenString = (string) $token;
+        $this->_token       = $token;
+        return $this;
+    }
+
+    /**
+     * Returns the strict parameter
+     *
+     * @return boolean
+     */
+    public function getStrict()
+    {
+        return $this->_strict;
+    }
+
+    /**
+     * Sets the strict parameter
+     *
+     * @param Zend_Validate_Identical
+     */
+    public function setStrict($strict)
+    {
+        $this->_strict = (boolean) $strict;
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if a token has been set and the provided value
+     * matches that token.
+     *
+     * @param  mixed $value
+     * @param  array $context
+     * @return boolean
+     */
+    public function isValid($value, $context = null)
+    {
+        $this->_setValue((string) $value);
+
+        if (($context !== null) && isset($context) && array_key_exists($this->getToken(), $context)) {
+            $token = $context[$this->getToken()];
+        } else {
+            $token = $this->getToken();
+        }
+
+        if ($token === null) {
+            $this->_error(self::MISSING_TOKEN);
+            return false;
+        }
+
+        $strict = $this->getStrict();
+        if (($strict && ($value !== $token)) || (!$strict && ($value != $token))) {
+            $this->_error(self::NOT_SAME);
+            return false;
+        }
+
+        return true;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Callback.php

@@ -0,0 +1,174 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Callback.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Callback extends Zend_Validate_Abstract
+{
+    /**
+     * Invalid callback
+     */
+    const INVALID_CALLBACK = 'callbackInvalid';
+
+    /**
+     * Invalid value
+     */
+    const INVALID_VALUE = 'callbackValue';
+
+    /**
+     * Validation failure message template definitions
+     *
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::INVALID_VALUE    => "'%value%' is not valid",
+        self::INVALID_CALLBACK => "An exception has been raised within the callback",
+    );
+
+    /**
+     * Callback in a call_user_func format
+     *
+     * @var string|array
+     */
+    protected $_callback = null;
+
+    /**
+     * Default options to set for the filter
+     *
+     * @var mixed
+     */
+    protected $_options = array();
+
+    /**
+     * Sets validator options
+     *
+     * @param  string|array $callback
+     * @param  mixed   $max
+     * @param  boolean $inclusive
+     * @return void
+     */
+    public function __construct($callback = null)
+    {
+        if (is_callable($callback)) {
+            $this->setCallback($callback);
+        } elseif (is_array($callback)) {
+            if (isset($callback['callback'])) {
+                $this->setCallback($callback['callback']);
+            }
+            if (isset($callback['options'])) {
+                $this->setOptions($callback['options']);
+            }
+        }
+
+        if (null === ($initializedCallack = $this->getCallback())) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('No callback registered');
+        }
+    }
+
+    /**
+     * Returns the set callback
+     *
+     * @return mixed
+     */
+    public function getCallback()
+    {
+        return $this->_callback;
+    }
+
+    /**
+     * Sets the callback
+     *
+     * @param  string|array $callback
+     * @return Zend_Validate_Callback Provides a fluent interface
+     */
+    public function setCallback($callback)
+    {
+        if (!is_callable($callback)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Invalid callback given');
+        }
+        $this->_callback = $callback;
+        return $this;
+    }
+
+    /**
+     * Returns the set options for the callback
+     *
+     * @return mixed
+     */
+    public function getOptions()
+    {
+        return $this->_options;
+    }
+
+    /**
+     * Sets options for the callback
+     *
+     * @param  mixed $max
+     * @return Zend_Validate_Callback Provides a fluent interface
+     */
+    public function setOptions($options)
+    {
+        $this->_options = (array) $options;
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the set callback returns
+     * for the provided $value
+     *
+     * @param  mixed $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        $this->_setValue($value);
+
+        $options  = $this->getOptions();
+        $callback = $this->getCallback();
+        $args     = func_get_args();
+        $options  = array_merge($args, $options);
+
+        try {
+            if (!call_user_func_array($callback, $options)) {
+                $this->_error(self::INVALID_VALUE);
+                return false;
+            }
+        } catch (Exception $e) {
+            $this->_error(self::INVALID_CALLBACK);
+            return false;
+        }
+
+        return true;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/EmailAddress.php

@@ -0,0 +1,560 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: EmailAddress.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @see Zend_Validate_Hostname
+ */
+require_once 'Zend/Validate/Hostname.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_EmailAddress extends Zend_Validate_Abstract
+{
+    const INVALID            = 'emailAddressInvalid';
+    const INVALID_FORMAT     = 'emailAddressInvalidFormat';
+    const INVALID_HOSTNAME   = 'emailAddressInvalidHostname';
+    const INVALID_MX_RECORD  = 'emailAddressInvalidMxRecord';
+    const INVALID_SEGMENT    = 'emailAddressInvalidSegment';
+    const DOT_ATOM           = 'emailAddressDotAtom';
+    const QUOTED_STRING      = 'emailAddressQuotedString';
+    const INVALID_LOCAL_PART = 'emailAddressInvalidLocalPart';
+    const LENGTH_EXCEEDED    = 'emailAddressLengthExceeded';
+
+    /**
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::INVALID            => "Invalid type given. String expected",
+        self::INVALID_FORMAT     => "'%value%' is no valid email address in the basic format local-part@hostname",
+        self::INVALID_HOSTNAME   => "'%hostname%' is no valid hostname for email address '%value%'",
+        self::INVALID_MX_RECORD  => "'%hostname%' does not appear to have a valid MX record for the email address '%value%'",
+        self::INVALID_SEGMENT    => "'%hostname%' is not in a routable network segment. The email address '%value%' should not be resolved from public network",
+        self::DOT_ATOM           => "'%localPart%' can not be matched against dot-atom format",
+        self::QUOTED_STRING      => "'%localPart%' can not be matched against quoted-string format",
+        self::INVALID_LOCAL_PART => "'%localPart%' is no valid local part for email address '%value%'",
+        self::LENGTH_EXCEEDED    => "'%value%' exceeds the allowed length",
+    );
+
+    /**
+     * @see http://en.wikipedia.org/wiki/IPv4
+     * @var array
+     */
+    protected $_invalidIp = array(
+        '0'   => '0.0.0.0/8',
+        '10'  => '10.0.0.0/8',
+        '127' => '127.0.0.0/8',
+        '128' => '128.0.0.0/16',
+        '169' => '169.254.0.0/16',
+        '172' => '172.16.0.0/12',
+        '191' => '191.255.0.0/16',
+        '192' => array(
+            '192.0.0.0/24',
+            '192.0.2.0/24',
+            '192.88.99.0/24',
+            '192.168.0.0/16'
+        ),
+        '198' => '198.18.0.0/15',
+        '223' => '223.255.255.0/24',
+        '224' => '224.0.0.0/4',
+        '240' => '240.0.0.0/4'
+    );
+
+    /**
+     * @var array
+     */
+    protected $_messageVariables = array(
+        'hostname'  => '_hostname',
+        'localPart' => '_localPart'
+    );
+
+    /**
+     * @var string
+     */
+    protected $_hostname;
+
+    /**
+     * @var string
+     */
+    protected $_localPart;
+
+    /**
+     * Internal options array
+     */
+    protected $_options = array(
+        'mx'       => false,
+        'deep'     => false,
+        'domain'   => true,
+        'allow'    => Zend_Validate_Hostname::ALLOW_DNS,
+        'hostname' => null
+    );
+
+    /**
+     * Instantiates hostname validator for local use
+     *
+     * The following option keys are supported:
+     * 'hostname' => A hostname validator, see Zend_Validate_Hostname
+     * 'allow'    => Options for the hostname validator, see Zend_Validate_Hostname::ALLOW_*
+     * 'mx'       => If MX check should be enabled, boolean
+     * 'deep'     => If a deep MX check should be done, boolean
+     *
+     * @param array|Zend_Config $options OPTIONAL
+     * @return void
+     */
+    public function __construct($options = array())
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } else if (!is_array($options)) {
+            $options = func_get_args();
+            $temp['allow'] = array_shift($options);
+            if (!empty($options)) {
+                $temp['mx'] = array_shift($options);
+            }
+
+            if (!empty($options)) {
+                $temp['hostname'] = array_shift($options);
+            }
+
+            $options = $temp;
+        }
+
+        $options += $this->_options;
+        $this->setOptions($options);
+    }
+
+    /**
+     * Returns all set Options
+     *
+     * @return array
+     */
+    public function getOptions()
+    {
+        return $this->_options;
+    }
+
+    /**
+     * Set options for the email validator
+     *
+     * @param array $options
+     * @return Zend_Validate_EmailAddress fluid interface
+     */
+    public function setOptions(array $options = array())
+    {
+        if (array_key_exists('messages', $options)) {
+            $this->setMessages($options['messages']);
+        }
+
+        if (array_key_exists('hostname', $options)) {
+            if (array_key_exists('allow', $options)) {
+                $this->setHostnameValidator($options['hostname'], $options['allow']);
+            } else {
+                $this->setHostnameValidator($options['hostname']);
+            }
+        }
+
+        if (array_key_exists('mx', $options)) {
+            $this->setValidateMx($options['mx']);
+        }
+
+        if (array_key_exists('deep', $options)) {
+            $this->setDeepMxCheck($options['deep']);
+        }
+
+        if (array_key_exists('domain', $options)) {
+            $this->setDomainCheck($options['domain']);
+        }
+
+        return $this;
+    }
+
+    /**
+     * Sets the validation failure message template for a particular key
+     * Adds the ability to set messages to the attached hostname validator
+     *
+     * @param  string $messageString
+     * @param  string $messageKey     OPTIONAL
+     * @return Zend_Validate_Abstract Provides a fluent interface
+     * @throws Zend_Validate_Exception
+     */
+    public function setMessage($messageString, $messageKey = null)
+    {
+        $messageKeys = $messageKey;
+        if ($messageKey === null) {
+            $keys = array_keys($this->_messageTemplates);
+            $messageKeys = current($keys);
+        }
+
+        if (!isset($this->_messageTemplates[$messageKeys])) {
+            $this->_options['hostname']->setMessage($messageString, $messageKey);
+        }
+
+        $this->_messageTemplates[$messageKeys] = $messageString;
+        return $this;
+    }
+
+    /**
+     * Returns the set hostname validator
+     *
+     * @return Zend_Validate_Hostname
+     */
+    public function getHostnameValidator()
+    {
+        return $this->_options['hostname'];
+    }
+
+    /**
+     * @param Zend_Validate_Hostname $hostnameValidator OPTIONAL
+     * @param int                    $allow             OPTIONAL
+     * @return void
+     */
+    public function setHostnameValidator(Zend_Validate_Hostname $hostnameValidator = null, $allow = Zend_Validate_Hostname::ALLOW_DNS)
+    {
+        if (!$hostnameValidator) {
+            $hostnameValidator = new Zend_Validate_Hostname($allow);
+        }
+
+        $this->_options['hostname'] = $hostnameValidator;
+        $this->_options['allow']    = $allow;
+        return $this;
+    }
+
+    /**
+     * Whether MX checking via getmxrr is supported or not
+     *
+     * This currently only works on UNIX systems
+     *
+     * @return boolean
+     */
+    public function validateMxSupported()
+    {
+        return function_exists('getmxrr');
+    }
+
+    /**
+     * Returns the set validateMx option
+     *
+     * @return boolean
+     */
+    public function getValidateMx()
+    {
+        return $this->_options['mx'];
+    }
+
+    /**
+     * Set whether we check for a valid MX record via DNS
+     *
+     * This only applies when DNS hostnames are validated
+     *
+     * @param boolean $mx Set allowed to true to validate for MX records, and false to not validate them
+     * @return Zend_Validate_EmailAddress Fluid Interface
+     */
+    public function setValidateMx($mx)
+    {
+        if ((bool) $mx && !$this->validateMxSupported()) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('MX checking not available on this system');
+        }
+
+        $this->_options['mx'] = (bool) $mx;
+        return $this;
+    }
+
+    /**
+     * Returns the set deepMxCheck option
+     *
+     * @return boolean
+     */
+    public function getDeepMxCheck()
+    {
+        return $this->_options['deep'];
+    }
+
+    /**
+     * Set whether we check MX record should be a deep validation
+     *
+     * @param boolean $deep Set deep to true to perform a deep validation process for MX records
+     * @return Zend_Validate_EmailAddress Fluid Interface
+     */
+    public function setDeepMxCheck($deep)
+    {
+        $this->_options['deep'] = (bool) $deep;
+        return $this;
+    }
+
+    /**
+     * Returns the set domainCheck option
+     *
+     * @return unknown
+     */
+    public function getDomainCheck()
+    {
+        return $this->_options['domain'];
+    }
+
+    /**
+     * Sets if the domain should also be checked
+     * or only the local part of the email address
+     *
+     * @param boolean $domain
+     * @return Zend_Validate_EmailAddress Fluid Interface
+     */
+    public function setDomainCheck($domain = true)
+    {
+        $this->_options['domain'] = (boolean) $domain;
+        return $this;
+    }
+
+    /**
+     * Returns if the given host is reserved
+     *
+     * @param string $host
+     * @return boolean
+     */
+    private function _isReserved($host){
+        if (!preg_match('/^([0-9]{1,3}\.){3}[0-9]{1,3}$/', $host)) {
+            $host = gethostbyname($host);
+        }
+
+        $octet = explode('.',$host);
+        if ((int)$octet[0] >= 224) {
+            return true;
+        } else if (array_key_exists($octet[0], $this->_invalidIp)) {
+            foreach ((array)$this->_invalidIp[$octet[0]] as $subnetData) {
+                // we skip the first loop as we already know that octet matches
+                for ($i = 1; $i < 4; $i++) {
+                    if (strpos($subnetData, $octet[$i]) !== $i * 4) {
+                        break;
+                    }
+                }
+
+                $host       = explode("/", $subnetData);
+                $binaryHost = "";
+                $tmp        = explode(".", $host[0]);
+                for ($i = 0; $i < 4 ; $i++) {
+                    $binaryHost .= str_pad(decbin($tmp[$i]), 8, "0", STR_PAD_LEFT);
+                }
+
+                $segmentData = array(
+                    'network'   => (int)$this->_toIp(str_pad(substr($binaryHost, 0, $host[1]), 32, 0)),
+                    'broadcast' => (int)$this->_toIp(str_pad(substr($binaryHost, 0, $host[1]), 32, 1))
+                );
+
+                for ($j = $i; $j < 4; $j++) {
+                    if ((int)$octet[$j] < $segmentData['network'][$j] ||
+                        (int)$octet[$j] > $segmentData['broadcast'][$j]) {
+                        return false;
+                    }
+                }
+            }
+
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Converts a binary string to an IP address
+     *
+     * @param string $binary
+     * @return mixed
+     */
+    private function _toIp($binary)
+    {
+        $ip  = array();
+        $tmp = explode(".", chunk_split($binary, 8, "."));
+        for ($i = 0; $i < 4 ; $i++) {
+            $ip[$i] = bindec($tmp[$i]);
+        }
+
+        return $ip;
+    }
+
+    /**
+     * Internal method to validate the local part of the email address
+     *
+     * @return boolean
+     */
+    private function _validateLocalPart()
+    {
+        // First try to match the local part on the common dot-atom format
+        $result = false;
+
+        // Dot-atom characters are: 1*atext *("." 1*atext)
+        // atext: ALPHA / DIGIT / and "!", "#", "$", "%", "&", "'", "*",
+        //        "+", "-", "/", "=", "?", "^", "_", "`", "{", "|", "}", "~"
+        $atext = 'a-zA-Z0-9\x21\x23\x24\x25\x26\x27\x2a\x2b\x2d\x2f\x3d\x3f\x5e\x5f\x60\x7b\x7c\x7d\x7e';
+        if (preg_match('/^[' . $atext . ']+(\x2e+[' . $atext . ']+)*$/', $this->_localPart)) {
+            $result = true;
+        } else {
+            // Try quoted string format
+
+            // Quoted-string characters are: DQUOTE *([FWS] qtext/quoted-pair) [FWS] DQUOTE
+            // qtext: Non white space controls, and the rest of the US-ASCII characters not
+            //   including "\" or the quote character
+            $noWsCtl = '\x01-\x08\x0b\x0c\x0e-\x1f\x7f';
+            $qtext   = $noWsCtl . '\x21\x23-\x5b\x5d-\x7e';
+            $ws      = '\x20\x09';
+            if (preg_match('/^\x22([' . $ws . $qtext . '])*[$ws]?\x22$/', $this->_localPart)) {
+                $result = true;
+            } else {
+                $this->_error(self::DOT_ATOM);
+                $this->_error(self::QUOTED_STRING);
+                $this->_error(self::INVALID_LOCAL_PART);
+            }
+        }
+
+        return $result;
+    }
+
+    /**
+     * Internal method to validate the servers MX records
+     *
+     * @return boolean
+     */
+    private function _validateMXRecords()
+    {
+        $mxHosts = array();
+        $result = getmxrr($this->_hostname, $mxHosts);
+        if (!$result) {
+            $this->_error(self::INVALID_MX_RECORD);
+        } else if ($this->_options['deep'] && function_exists('checkdnsrr')) {
+            $validAddress = false;
+            $reserved     = true;
+            foreach ($mxHosts as $hostname) {
+                $res = $this->_isReserved($hostname);
+                if (!$res) {
+                    $reserved = false;
+                }
+
+                if (!$res
+                    && (checkdnsrr($hostname, "A")
+                    || checkdnsrr($hostname, "AAAA")
+                    || checkdnsrr($hostname, "A6"))) {
+                    $validAddress = true;
+                    break;
+                }
+            }
+
+            if (!$validAddress) {
+                $result = false;
+                if ($reserved) {
+                    $this->_error(self::INVALID_SEGMENT);
+                } else {
+                    $this->_error(self::INVALID_MX_RECORD);
+                }
+            }
+        }
+
+        return $result;
+    }
+
+    /**
+     * Internal method to validate the hostname part of the email address
+     *
+     * @return boolean
+     */
+    private function _validateHostnamePart()
+    {
+        $hostname = $this->_options['hostname']->setTranslator($this->getTranslator())
+                         ->isValid($this->_hostname);
+        if (!$hostname) {
+            $this->_error(self::INVALID_HOSTNAME);
+
+            // Get messages and errors from hostnameValidator
+            foreach ($this->_options['hostname']->getMessages() as $code => $message) {
+                $this->_messages[$code] = $message;
+            }
+
+            foreach ($this->_options['hostname']->getErrors() as $error) {
+                $this->_errors[] = $error;
+            }
+        } else if ($this->_options['mx']) {
+            // MX check on hostname
+            $hostname = $this->_validateMXRecords();
+        }
+
+        return $hostname;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if $value is a valid email address
+     * according to RFC2822
+     *
+     * @link   http://www.ietf.org/rfc/rfc2822.txt RFC2822
+     * @link   http://www.columbia.edu/kermit/ascii.html US-ASCII characters
+     * @param  string $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        if (!is_string($value)) {
+            $this->_error(self::INVALID);
+            return false;
+        }
+
+        $matches = array();
+        $length  = true;
+        $this->_setValue($value);
+
+        // Split email address up and disallow '..'
+        if ((strpos($value, '..') !== false) or
+            (!preg_match('/^(.+)@([^@]+)$/', $value, $matches))) {
+            $this->_error(self::INVALID_FORMAT);
+            return false;
+        }
+
+        $this->_localPart = $matches[1];
+        $this->_hostname  = $matches[2];
+
+        if ((strlen($this->_localPart) > 64) || (strlen($this->_hostname) > 255)) {
+            $length = false;
+            $this->_error(self::LENGTH_EXCEEDED);
+        }
+
+        // Match hostname part
+        if ($this->_options['domain']) {
+            $hostname = $this->_validateHostnamePart();
+        }
+
+        $local = $this->_validateLocalPart();
+
+        // If both parts valid, return true
+        if ($local && $length) {
+            if (($this->_options['domain'] && $hostname) || !$this->_options['domain']) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Interface.php

@@ -0,0 +1,54 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Interface.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+interface Zend_Validate_Interface
+{
+    /**
+     * Returns true if and only if $value meets the validation requirements
+     *
+     * If $value fails validation, then this method returns false, and
+     * getMessages() will return an array of messages that explain why the
+     * validation failed.
+     *
+     * @param  mixed $value
+     * @return boolean
+     * @throws Zend_Validate_Exception If validation of $value is impossible
+     */
+    public function isValid($value);
+
+    /**
+     * Returns an array of messages that explain why the most recent isValid()
+     * call returned false. The array keys are validation failure message identifiers,
+     * and the array values are the corresponding human-readable message strings.
+     *
+     * If isValid() was never called or if the most recent isValid() call
+     * returned true, then this method returns an empty array.
+     *
+     * @return array
+     */
+    public function getMessages();
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Abstract.php

@@ -0,0 +1,456 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Abstract.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Interface
+ */
+require_once 'Zend/Validate/Interface.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+abstract class Zend_Validate_Abstract implements Zend_Validate_Interface
+{
+    /**
+     * The value to be validated
+     *
+     * @var mixed
+     */
+    protected $_value;
+
+    /**
+     * Additional variables available for validation failure messages
+     *
+     * @var array
+     */
+    protected $_messageVariables = array();
+
+    /**
+     * Validation failure message template definitions
+     *
+     * @var array
+     */
+    protected $_messageTemplates = array();
+
+    /**
+     * Array of validation failure messages
+     *
+     * @var array
+     */
+    protected $_messages = array();
+
+    /**
+     * Flag indidcating whether or not value should be obfuscated in error
+     * messages
+     * @var bool
+     */
+    protected $_obscureValue = false;
+
+    /**
+     * Array of validation failure message codes
+     *
+     * @var array
+     * @deprecated Since 1.5.0
+     */
+    protected $_errors = array();
+
+    /**
+     * Translation object
+     * @var Zend_Translate
+     */
+    protected $_translator;
+
+    /**
+     * Default translation object for all validate objects
+     * @var Zend_Translate
+     */
+    protected static $_defaultTranslator;
+
+    /**
+     * Is translation disabled?
+     * @var Boolean
+     */
+    protected $_translatorDisabled = false;
+
+    /**
+     * Limits the maximum returned length of a error message
+     *
+     * @var Integer
+     */
+    protected static $_messageLength = -1;
+
+    /**
+     * Returns array of validation failure messages
+     *
+     * @return array
+     */
+    public function getMessages()
+    {
+        return $this->_messages;
+    }
+
+    /**
+     * Returns an array of the names of variables that are used in constructing validation failure messages
+     *
+     * @return array
+     */
+    public function getMessageVariables()
+    {
+        return array_keys($this->_messageVariables);
+    }
+
+    /**
+     * Returns the message templates from the validator
+     *
+     * @return array
+     */
+    public function getMessageTemplates()
+    {
+        return $this->_messageTemplates;
+    }
+
+    /**
+     * Sets the validation failure message template for a particular key
+     *
+     * @param  string $messageString
+     * @param  string $messageKey     OPTIONAL
+     * @return Zend_Validate_Abstract Provides a fluent interface
+     * @throws Zend_Validate_Exception
+     */
+    public function setMessage($messageString, $messageKey = null)
+    {
+        if ($messageKey === null) {
+            $keys = array_keys($this->_messageTemplates);
+            foreach($keys as $key) {
+                $this->setMessage($messageString, $key);
+            }
+            return $this;
+        }
+
+        if (!isset($this->_messageTemplates[$messageKey])) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("No message template exists for key '$messageKey'");
+        }
+
+        $this->_messageTemplates[$messageKey] = $messageString;
+        return $this;
+    }
+
+    /**
+     * Sets validation failure message templates given as an array, where the array keys are the message keys,
+     * and the array values are the message template strings.
+     *
+     * @param  array $messages
+     * @return Zend_Validate_Abstract
+     */
+    public function setMessages(array $messages)
+    {
+        foreach ($messages as $key => $message) {
+            $this->setMessage($message, $key);
+        }
+        return $this;
+    }
+
+    /**
+     * Magic function returns the value of the requested property, if and only if it is the value or a
+     * message variable.
+     *
+     * @param  string $property
+     * @return mixed
+     * @throws Zend_Validate_Exception
+     */
+    public function __get($property)
+    {
+        if ($property == 'value') {
+            return $this->_value;
+        }
+        if (array_key_exists($property, $this->_messageVariables)) {
+            return $this->{$this->_messageVariables[$property]};
+        }
+        /**
+         * @see Zend_Validate_Exception
+         */
+        require_once 'Zend/Validate/Exception.php';
+        throw new Zend_Validate_Exception("No property exists by the name '$property'");
+    }
+
+    /**
+     * Constructs and returns a validation failure message with the given message key and value.
+     *
+     * Returns null if and only if $messageKey does not correspond to an existing template.
+     *
+     * If a translator is available and a translation exists for $messageKey,
+     * the translation will be used.
+     *
+     * @param  string $messageKey
+     * @param  string $value
+     * @return string
+     */
+    protected function _createMessage($messageKey, $value)
+    {
+        if (!isset($this->_messageTemplates[$messageKey])) {
+            return null;
+        }
+
+        $message = $this->_messageTemplates[$messageKey];
+
+        if (null !== ($translator = $this->getTranslator())) {
+            if ($translator->isTranslated($messageKey)) {
+                $message = $translator->translate($messageKey);
+            } else {
+                $message = $translator->translate($message);
+            }
+        }
+
+        if (is_object($value)) {
+            if (!in_array('__toString', get_class_methods($value))) {
+                $value = get_class($value) . ' object';
+            } else {
+                $value = $value->__toString();
+            }
+        } else {
+            $value = (string)$value;
+        }
+
+        if ($this->getObscureValue()) {
+            $value = str_repeat('*', strlen($value));
+        }
+
+        $message = str_replace('%value%', (string) $value, $message);
+        foreach ($this->_messageVariables as $ident => $property) {
+            $message = str_replace("%$ident%", (string) $this->$property, $message);
+        }
+
+        $length = self::getMessageLength();
+        if (($length > -1) && (strlen($message) > $length)) {
+            $message = substr($message, 0, (self::getMessageLength() - 3)) . '...';
+        }
+
+        return $message;
+    }
+
+    /**
+     * @param  string $messageKey
+     * @param  string $value      OPTIONAL
+     * @return void
+     */
+    protected function _error($messageKey, $value = null)
+    {
+        if ($messageKey === null) {
+            $keys = array_keys($this->_messageTemplates);
+            $messageKey = current($keys);
+        }
+        if ($value === null) {
+            $value = $this->_value;
+        }
+        $this->_errors[]              = $messageKey;
+        $this->_messages[$messageKey] = $this->_createMessage($messageKey, $value);
+    }
+
+    /**
+     * Sets the value to be validated and clears the messages and errors arrays
+     *
+     * @param  mixed $value
+     * @return void
+     */
+    protected function _setValue($value)
+    {
+        $this->_value    = $value;
+        $this->_messages = array();
+        $this->_errors   = array();
+    }
+
+    /**
+     * Returns array of validation failure message codes
+     *
+     * @return array
+     * @deprecated Since 1.5.0
+     */
+    public function getErrors()
+    {
+        return $this->_errors;
+    }
+
+    /**
+     * Set flag indicating whether or not value should be obfuscated in messages
+     *
+     * @param  bool $flag
+     * @return Zend_Validate_Abstract
+     */
+    public function setObscureValue($flag)
+    {
+        $this->_obscureValue = (bool) $flag;
+        return $this;
+    }
+
+    /**
+     * Retrieve flag indicating whether or not value should be obfuscated in
+     * messages
+     *
+     * @return bool
+     */
+    public function getObscureValue()
+    {
+        return $this->_obscureValue;
+    }
+
+    /**
+     * Set translation object
+     *
+     * @param  Zend_Translate|Zend_Translate_Adapter|null $translator
+     * @return Zend_Validate_Abstract
+     */
+    public function setTranslator($translator = null)
+    {
+        if ((null === $translator) || ($translator instanceof Zend_Translate_Adapter)) {
+            $this->_translator = $translator;
+        } elseif ($translator instanceof Zend_Translate) {
+            $this->_translator = $translator->getAdapter();
+        } else {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Invalid translator specified');
+        }
+        return $this;
+    }
+
+    /**
+     * Return translation object
+     *
+     * @return Zend_Translate_Adapter|null
+     */
+    public function getTranslator()
+    {
+        if ($this->translatorIsDisabled()) {
+            return null;
+        }
+
+        if (null === $this->_translator) {
+            return self::getDefaultTranslator();
+        }
+
+        return $this->_translator;
+    }
+
+    /**
+     * Does this validator have its own specific translator?
+     *
+     * @return bool
+     */
+    public function hasTranslator()
+    {
+        return (bool)$this->_translator;
+    }
+
+    /**
+     * Set default translation object for all validate objects
+     *
+     * @param  Zend_Translate|Zend_Translate_Adapter|null $translator
+     * @return void
+     */
+    public static function setDefaultTranslator($translator = null)
+    {
+        if ((null === $translator) || ($translator instanceof Zend_Translate_Adapter)) {
+            self::$_defaultTranslator = $translator;
+        } elseif ($translator instanceof Zend_Translate) {
+            self::$_defaultTranslator = $translator->getAdapter();
+        } else {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Invalid translator specified');
+        }
+    }
+
+    /**
+     * Get default translation object for all validate objects
+     *
+     * @return Zend_Translate_Adapter|null
+     */
+    public static function getDefaultTranslator()
+    {
+        if (null === self::$_defaultTranslator) {
+            require_once 'Zend/Registry.php';
+            if (Zend_Registry::isRegistered('Zend_Translate')) {
+                $translator = Zend_Registry::get('Zend_Translate');
+                if ($translator instanceof Zend_Translate_Adapter) {
+                    return $translator;
+                } elseif ($translator instanceof Zend_Translate) {
+                    return $translator->getAdapter();
+                }
+            }
+        }
+
+        return self::$_defaultTranslator;
+    }
+
+    /**
+     * Is there a default translation object set?
+     *
+     * @return boolean
+     */
+    public static function hasDefaultTranslator()
+    {
+        return (bool)self::$_defaultTranslator;
+    }
+
+    /**
+     * Indicate whether or not translation should be disabled
+     *
+     * @param  bool $flag
+     * @return Zend_Validate_Abstract
+     */
+    public function setDisableTranslator($flag)
+    {
+        $this->_translatorDisabled = (bool) $flag;
+        return $this;
+    }
+
+    /**
+     * Is translation disabled?
+     *
+     * @return bool
+     */
+    public function translatorIsDisabled()
+    {
+        return $this->_translatorDisabled;
+    }
+
+    /**
+     * Returns the maximum allowed message length
+     *
+     * @return integer
+     */
+    public static function getMessageLength()
+    {
+        return self::$_messageLength;
+    }
+
+    /**
+     * Sets the maximum allowed message length
+     *
+     * @param integer $length
+     */
+    public static function setMessageLength($length = -1)
+    {
+        self::$_messageLength = $length;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/CreditCard.php

@@ -0,0 +1,317 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: CreditCard.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_CreditCard extends Zend_Validate_Abstract
+{
+    /**
+     * Detected CCI list
+     *
+     * @var string
+     */
+    const ALL              = 'All';
+    const AMERICAN_EXPRESS = 'American_Express';
+    const UNIONPAY         = 'Unionpay';
+    const DINERS_CLUB      = 'Diners_Club';
+    const DINERS_CLUB_US   = 'Diners_Club_US';
+    const DISCOVER         = 'Discover';
+    const JCB              = 'JCB';
+    const LASER            = 'Laser';
+    const MAESTRO          = 'Maestro';
+    const MASTERCARD       = 'Mastercard';
+    const SOLO             = 'Solo';
+    const VISA             = 'Visa';
+
+    const CHECKSUM       = 'creditcardChecksum';
+    const CONTENT        = 'creditcardContent';
+    const INVALID        = 'creditcardInvalid';
+    const LENGTH         = 'creditcardLength';
+    const PREFIX         = 'creditcardPrefix';
+    const SERVICE        = 'creditcardService';
+    const SERVICEFAILURE = 'creditcardServiceFailure';
+
+    /**
+     * Validation failure message template definitions
+     *
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::CHECKSUM       => "'%value%' seems to contain an invalid checksum",
+        self::CONTENT        => "'%value%' must contain only digits",
+        self::INVALID        => "Invalid type given. String expected",
+        self::LENGTH         => "'%value%' contains an invalid amount of digits",
+        self::PREFIX         => "'%value%' is not from an allowed institute",
+        self::SERVICE        => "'%value%' seems to be an invalid creditcard number",
+        self::SERVICEFAILURE => "An exception has been raised while validating '%value%'",
+    );
+
+    /**
+     * List of allowed CCV lengths
+     *
+     * @var array
+     */
+    protected $_cardLength = array(
+        self::AMERICAN_EXPRESS => array(15),
+        self::DINERS_CLUB      => array(14),
+        self::DINERS_CLUB_US   => array(16),
+        self::DISCOVER         => array(16),
+        self::JCB              => array(16),
+        self::LASER            => array(16, 17, 18, 19),
+        self::MAESTRO          => array(12, 13, 14, 15, 16, 17, 18, 19),
+        self::MASTERCARD       => array(16),
+        self::SOLO             => array(16, 18, 19),
+        self::UNIONPAY         => array(16, 17, 18, 19),
+        self::VISA             => array(16),
+    );
+
+    /**
+     * List of accepted CCV provider tags
+     *
+     * @var array
+     */
+    protected $_cardType = array(
+        self::AMERICAN_EXPRESS => array('34', '37'),
+        self::DINERS_CLUB      => array('300', '301', '302', '303', '304', '305', '36'),
+        self::DINERS_CLUB_US   => array('54', '55'),
+        self::DISCOVER         => array('6011', '622126', '622127', '622128', '622129', '62213',
+                                        '62214', '62215', '62216', '62217', '62218', '62219',
+                                        '6222', '6223', '6224', '6225', '6226', '6227', '6228',
+                                        '62290', '62291', '622920', '622921', '622922', '622923',
+                                        '622924', '622925', '644', '645', '646', '647', '648',
+                                        '649', '65'),
+        self::JCB              => array('3528', '3529', '353', '354', '355', '356', '357', '358'),
+        self::LASER            => array('6304', '6706', '6771', '6709'),
+        self::MAESTRO          => array('5018', '5020', '5038', '6304', '6759', '6761', '6763'),
+        self::MASTERCARD       => array('51', '52', '53', '54', '55'),
+        self::SOLO             => array('6334', '6767'),
+        self::UNIONPAY         => array('622126', '622127', '622128', '622129', '62213', '62214',
+                                        '62215', '62216', '62217', '62218', '62219', '6222', '6223',
+                                        '6224', '6225', '6226', '6227', '6228', '62290', '62291',
+                                        '622920', '622921', '622922', '622923', '622924', '622925'),
+        self::VISA             => array('4'),
+    );
+
+    /**
+     * CCIs which are accepted by validation
+     *
+     * @var array
+     */
+    protected $_type = array();
+
+    /**
+     * Service callback for additional validation
+     *
+     * @var callback
+     */
+    protected $_service;
+
+    /**
+     * Constructor
+     *
+     * @param string|array $type OPTIONAL Type of CCI to allow
+     */
+    public function __construct($options = array())
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } else if (!is_array($options)) {
+            $options = func_get_args();
+            $temp['type'] = array_shift($options);
+            if (!empty($options)) {
+                $temp['service'] = array_shift($options);
+            }
+
+            $options = $temp;
+        }
+
+        if (!array_key_exists('type', $options)) {
+            $options['type'] = self::ALL;
+        }
+
+        $this->setType($options['type']);
+        if (array_key_exists('service', $options)) {
+            $this->setService($options['service']);
+        }
+    }
+
+    /**
+     * Returns a list of accepted CCIs
+     *
+     * @return array
+     */
+    public function getType()
+    {
+        return $this->_type;
+    }
+
+    /**
+     * Sets CCIs which are accepted by validation
+     *
+     * @param string|array $type Type to allow for validation
+     * @return Zend_Validate_CreditCard Provides a fluid interface
+     */
+    public function setType($type)
+    {
+        $this->_type = array();
+        return $this->addType($type);
+    }
+
+    /**
+     * Adds a CCI to be accepted by validation
+     *
+     * @param string|array $type Type to allow for validation
+     * @return Zend_Validate_CreditCard Provides a fluid interface
+     */
+    public function addType($type)
+    {
+        if (is_string($type)) {
+            $type = array($type);
+        }
+
+        foreach($type as $typ) {
+            if (defined('self::' . strtoupper($typ)) && !in_array($typ, $this->_type)) {
+                $this->_type[] = $typ;
+            }
+
+            if (($typ == self::ALL)) {
+                $this->_type = array_keys($this->_cardLength);
+            }
+        }
+
+        return $this;
+    }
+
+    /**
+     * Returns the actual set service
+     *
+     * @return callback
+     */
+    public function getService()
+    {
+        return $this->_service;
+    }
+
+    /**
+     * Sets a new callback for service validation
+     *
+     * @param unknown_type $service
+     */
+    public function setService($service)
+    {
+        if (!is_callable($service)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Invalid callback given');
+        }
+
+        $this->_service = $service;
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if $value follows the Luhn algorithm (mod-10 checksum)
+     *
+     * @param  string $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        $this->_setValue($value);
+
+        if (!is_string($value)) {
+            $this->_error(self::INVALID, $value);
+            return false;
+        }
+
+        if (!ctype_digit($value)) {
+            $this->_error(self::CONTENT, $value);
+            return false;
+        }
+
+        $length = strlen($value);
+        $types  = $this->getType();
+        $foundp = false;
+        $foundl = false;
+        foreach ($types as $type) {
+            foreach ($this->_cardType[$type] as $prefix) {
+                if (substr($value, 0, strlen($prefix)) == $prefix) {
+                    $foundp = true;
+                    if (in_array($length, $this->_cardLength[$type])) {
+                        $foundl = true;
+                        break 2;
+                    }
+                }
+            }
+        }
+
+        if ($foundp == false){
+            $this->_error(self::PREFIX, $value);
+            return false;
+        }
+
+        if ($foundl == false) {
+            $this->_error(self::LENGTH, $value);
+            return false;
+        }
+
+        $sum    = 0;
+        $weight = 2;
+
+        for ($i = $length - 2; $i >= 0; $i--) {
+            $digit = $weight * $value[$i];
+            $sum += floor($digit / 10) + $digit % 10;
+            $weight = $weight % 2 + 1;
+        }
+
+        if ((10 - $sum % 10) % 10 != $value[$length - 1]) {
+            $this->_error(self::CHECKSUM, $value);
+            return false;
+        }
+
+        if (!empty($this->_service)) {
+            try {
+                require_once 'Zend/Validate/Callback.php';
+                $callback = new Zend_Validate_Callback($this->_service);
+                $callback->setOptions($this->_type);
+                if (!$callback->isValid($value)) {
+                    $this->_error(self::SERVICE, $value);
+                    return false;
+                }
+            } catch (Zend_Exception $e) {
+                $this->_error(self::SERVICEFAILURE, $value);
+                return false;
+            }
+        }
+
+        return true;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/InArray.php

@@ -0,0 +1,204 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: InArray.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_InArray extends Zend_Validate_Abstract
+{
+    const NOT_IN_ARRAY = 'notInArray';
+
+    /**
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::NOT_IN_ARRAY => "'%value%' was not found in the haystack",
+    );
+
+    /**
+     * Haystack of possible values
+     *
+     * @var array
+     */
+    protected $_haystack;
+
+    /**
+     * Whether a strict in_array() invocation is used
+     *
+     * @var boolean
+     */
+    protected $_strict = false;
+
+    /**
+     * Whether a recursive search should be done
+     *
+     * @var boolean
+     */
+    protected $_recursive = false;
+
+    /**
+     * Sets validator options
+     *
+     * @param  array|Zend_Config $haystack
+     * @return void
+     */
+    public function __construct($options)
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } else if (!is_array($options)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Array expected as parameter');
+        } else {
+            $count = func_num_args();
+            $temp  = array();
+            if ($count > 1) {
+                $temp['haystack'] = func_get_arg(0);
+                $temp['strict']   = func_get_arg(1);
+                $options = $temp;
+            } else {
+                $temp = func_get_arg(0);
+                if (!array_key_exists('haystack', $options)) {
+                    $options = array();
+                    $options['haystack'] = $temp;
+                } else {
+                    $options = $temp;
+                }
+            }
+        }
+
+        $this->setHaystack($options['haystack']);
+        if (array_key_exists('strict', $options)) {
+            $this->setStrict($options['strict']);
+        }
+
+        if (array_key_exists('recursive', $options)) {
+            $this->setRecursive($options['recursive']);
+        }
+    }
+
+    /**
+     * Returns the haystack option
+     *
+     * @return mixed
+     */
+    public function getHaystack()
+    {
+        return $this->_haystack;
+    }
+
+    /**
+     * Sets the haystack option
+     *
+     * @param  mixed $haystack
+     * @return Zend_Validate_InArray Provides a fluent interface
+     */
+    public function setHaystack(array $haystack)
+    {
+        $this->_haystack = $haystack;
+        return $this;
+    }
+
+    /**
+     * Returns the strict option
+     *
+     * @return boolean
+     */
+    public function getStrict()
+    {
+        return $this->_strict;
+    }
+
+    /**
+     * Sets the strict option
+     *
+     * @param  boolean $strict
+     * @return Zend_Validate_InArray Provides a fluent interface
+     */
+    public function setStrict($strict)
+    {
+        $this->_strict = (boolean) $strict;
+        return $this;
+    }
+
+    /**
+     * Returns the recursive option
+     *
+     * @return boolean
+     */
+    public function getRecursive()
+    {
+        return $this->_recursive;
+    }
+
+    /**
+     * Sets the recursive option
+     *
+     * @param  boolean $recursive
+     * @return Zend_Validate_InArray Provides a fluent interface
+     */
+    public function setRecursive($recursive)
+    {
+        $this->_recursive = (boolean) $recursive;
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if $value is contained in the haystack option. If the strict
+     * option is true, then the type of $value is also checked.
+     *
+     * @param  mixed $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        $this->_setValue($value);
+        if ($this->getRecursive()) {
+            $iterator = new RecursiveIteratorIterator(new RecursiveArrayIterator($this->_haystack));
+            foreach($iterator as $element) {
+                if ($this->_strict) {
+                    if ($element === $value) {
+                        return true;
+                    }
+                } else if ($element == $value) {
+                    return true;
+                }
+            }
+        } else {
+            if (in_array($value, $this->_haystack, $this->_strict)) {
+                return true;
+            }
+        }
+
+        $this->_error(self::NOT_IN_ARRAY);
+        return false;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Float.php

@@ -0,0 +1,134 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Float.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @see Zend_Locale_Format
+ */
+require_once 'Zend/Locale/Format.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Float extends Zend_Validate_Abstract
+{
+    const INVALID   = 'floatInvalid';
+    const NOT_FLOAT = 'notFloat';
+
+    /**
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::INVALID   => "Invalid type given. String, integer or float expected",
+        self::NOT_FLOAT => "'%value%' does not appear to be a float",
+    );
+
+    protected $_locale;
+
+    /**
+     * Constructor for the float validator
+     *
+     * @param string|Zend_Config|Zend_Locale $locale
+     */
+    public function __construct($locale = null)
+    {
+        if ($locale instanceof Zend_Config) {
+            $locale = $locale->toArray();
+        }
+
+        if (is_array($locale)) {
+            if (array_key_exists('locale', $locale)) {
+                $locale = $locale['locale'];
+            } else {
+                $locale = null;
+            }
+        }
+
+        if (empty($locale)) {
+            require_once 'Zend/Registry.php';
+            if (Zend_Registry::isRegistered('Zend_Locale')) {
+                $locale = Zend_Registry::get('Zend_Locale');
+            }
+        }
+
+        $this->setLocale($locale);
+    }
+
+    /**
+     * Returns the set locale
+     */
+    public function getLocale()
+    {
+        return $this->_locale;
+    }
+
+    /**
+     * Sets the locale to use
+     *
+     * @param string|Zend_Locale $locale
+     */
+    public function setLocale($locale = null)
+    {
+        require_once 'Zend/Locale.php';
+        $this->_locale = Zend_Locale::findLocale($locale);
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if $value is a floating-point value
+     *
+     * @param  string $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        if (!is_string($value) && !is_int($value) && !is_float($value)) {
+            $this->_error(self::INVALID);
+            return false;
+        }
+
+        if (is_float($value)) {
+            return true;
+        }
+
+        $this->_setValue($value);
+        try {
+            if (!Zend_Locale_Format::isFloat($value, array('locale' => $this->_locale))) {
+                $this->_error(self::NOT_FLOAT);
+                return false;
+            }
+        } catch (Zend_Locale_Exception $e) {
+            $this->_error(self::NOT_FLOAT);
+            return false;
+        }
+
+        return true;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Ip.php

@@ -0,0 +1,191 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Ip.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Ip extends Zend_Validate_Abstract
+{
+    const INVALID        = 'ipInvalid';
+    const NOT_IP_ADDRESS = 'notIpAddress';
+
+    /**
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::INVALID        => "Invalid type given. String expected",
+        self::NOT_IP_ADDRESS => "'%value%' does not appear to be a valid IP address",
+    );
+
+    /**
+     * internal options
+     *
+     * @var array
+     */
+    protected $_options = array(
+        'allowipv6' => true,
+        'allowipv4' => true
+    );
+
+    /**
+     * Sets validator options
+     *
+     * @param array $options OPTIONAL Options to set, see the manual for all available options
+     * @return void
+     */
+    public function __construct($options = array())
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } else if (!is_array($options)) {
+            $options = func_get_args();
+            $temp['allowipv6'] = array_shift($options);
+            if (!empty($options)) {
+                $temp['allowipv4'] = array_shift($options);
+            }
+
+            $options = $temp;
+        }
+
+        $options += $this->_options;
+        $this->setOptions($options);
+    }
+
+    /**
+     * Returns all set options
+     *
+     * @return array
+     */
+    public function getOptions()
+    {
+        return $this->_options;
+    }
+
+    /**
+     * Sets the options for this validator
+     *
+     * @param array $options
+     * @return Zend_Validate_Ip
+     */
+    public function setOptions($options)
+    {
+        if (array_key_exists('allowipv6', $options)) {
+            $this->_options['allowipv6'] = (boolean) $options['allowipv6'];
+        }
+
+        if (array_key_exists('allowipv4', $options)) {
+            $this->_options['allowipv4'] = (boolean) $options['allowipv4'];
+        }
+
+        if (!$this->_options['allowipv4'] && !$this->_options['allowipv6']) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Nothing to validate. Check your options');
+        }
+
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if $value is a valid IP address
+     *
+     * @param  mixed $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        if (!is_string($value)) {
+            $this->_error(self::INVALID);
+            return false;
+        }
+
+        $this->_setValue($value);
+        if (($this->_options['allowipv4'] && !$this->_options['allowipv6'] && !$this->_validateIPv4($value)) ||
+            (!$this->_options['allowipv4'] && $this->_options['allowipv6'] && !$this->_validateIPv6($value)) ||
+            ($this->_options['allowipv4'] && $this->_options['allowipv6'] && !$this->_validateIPv4($value) && !$this->_validateIPv6($value))) {
+            $this->_error(self::NOT_IP_ADDRESS);
+            return false;
+        }
+
+        return true;
+    }
+
+    /**
+     * Validates an IPv4 address
+     *
+     * @param string $value
+     */
+    protected function _validateIPv4($value) {
+        $ip2long = ip2long($value);
+        if($ip2long === false) {
+            return false;
+        }
+
+        return $value == long2ip($ip2long);
+    }
+
+    /**
+     * Validates an IPv6 address
+     *
+     * @param  string $value Value to check against
+     * @return boolean True when $value is a valid ipv6 address
+     *                 False otherwise
+     */
+    protected function _validateIPv6($value) {
+        if (strlen($value) < 3) {
+            return $value == '::';
+        }
+
+        if (strpos($value, '.')) {
+            $lastcolon = strrpos($value, ':');
+            if (!($lastcolon && $this->_validateIPv4(substr($value, $lastcolon + 1)))) {
+                return false;
+            }
+
+            $value = substr($value, 0, $lastcolon) . ':0:0';
+        }
+
+        if (strpos($value, '::') === false) {
+            return preg_match('/\A(?:[a-f0-9]{1,4}:){7}[a-f0-9]{1,4}\z/i', $value);
+        }
+
+        $colonCount = substr_count($value, ':');
+        if ($colonCount < 8) {
+            return preg_match('/\A(?::|(?:[a-f0-9]{1,4}:)+):(?:(?:[a-f0-9]{1,4}:)*[a-f0-9]{1,4})?\z/i', $value);
+        }
+
+        // special case with ending or starting double colon
+        if ($colonCount == 8) {
+            return preg_match('/\A(?:::)?(?:[a-f0-9]{1,4}:){6}[a-f0-9]{1,4}(?:::)?\z/i', $value);
+        }
+
+        return false;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Alnum.php

@@ -0,0 +1,150 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Alnum.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Alnum extends Zend_Validate_Abstract
+{
+    const INVALID      = 'alnumInvalid';
+    const NOT_ALNUM    = 'notAlnum';
+    const STRING_EMPTY = 'alnumStringEmpty';
+
+    /**
+     * Whether to allow white space characters; off by default
+     *
+     * @var boolean
+     * @deprecated
+     */
+    public $allowWhiteSpace;
+
+    /**
+     * Alphanumeric filter used for validation
+     *
+     * @var Zend_Filter_Alnum
+     */
+    protected static $_filter = null;
+
+    /**
+     * Validation failure message template definitions
+     *
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::INVALID      => "Invalid type given. String, integer or float expected",
+        self::NOT_ALNUM    => "'%value%' contains characters which are non alphabetic and no digits",
+        self::STRING_EMPTY => "'%value%' is an empty string",
+    );
+
+    /**
+     * Sets default option values for this instance
+     *
+     * @param  boolean|Zend_Config $allowWhiteSpace
+     * @return void
+     */
+    public function __construct($allowWhiteSpace = false)
+    {
+        if ($allowWhiteSpace instanceof Zend_Config) {
+            $allowWhiteSpace = $allowWhiteSpace->toArray();
+        }
+
+        if (is_array($allowWhiteSpace)) {
+            if (array_key_exists('allowWhiteSpace', $allowWhiteSpace)) {
+                $allowWhiteSpace = $allowWhiteSpace['allowWhiteSpace'];
+            } else {
+                $allowWhiteSpace = false;
+            }
+        }
+
+        $this->allowWhiteSpace = (boolean) $allowWhiteSpace;
+    }
+
+    /**
+     * Returns the allowWhiteSpace option
+     *
+     * @return boolean
+     */
+    public function getAllowWhiteSpace()
+    {
+        return $this->allowWhiteSpace;
+    }
+
+    /**
+     * Sets the allowWhiteSpace option
+     *
+     * @param boolean $allowWhiteSpace
+     * @return Zend_Filter_Alnum Provides a fluent interface
+     */
+    public function setAllowWhiteSpace($allowWhiteSpace)
+    {
+        $this->allowWhiteSpace = (boolean) $allowWhiteSpace;
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if $value contains only alphabetic and digit characters
+     *
+     * @param  string $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        if (!is_string($value) && !is_int($value) && !is_float($value)) {
+            $this->_error(self::INVALID);
+            return false;
+        }
+
+        $this->_setValue($value);
+
+        if ('' === $value) {
+            $this->_error(self::STRING_EMPTY);
+            return false;
+        }
+
+        if (null === self::$_filter) {
+            /**
+             * @see Zend_Filter_Alnum
+             */
+            require_once 'Zend/Filter/Alnum.php';
+            self::$_filter = new Zend_Filter_Alnum();
+        }
+
+        self::$_filter->allowWhiteSpace = $this->allowWhiteSpace;
+
+        if ($value != self::$_filter->filter($value)) {
+            $this->_error(self::NOT_ALNUM);
+            return false;
+        }
+
+        return true;
+    }
+
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/NotEmpty.php

@@ -0,0 +1,279 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: NotEmpty.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_NotEmpty extends Zend_Validate_Abstract
+{
+    const BOOLEAN       = 1;
+    const INTEGER       = 2;
+    const FLOAT         = 4;
+    const STRING        = 8;
+    const ZERO          = 16;
+    const EMPTY_ARRAY   = 32;
+    const NULL          = 64;
+    const PHP           = 127;
+    const SPACE         = 128;
+    const OBJECT        = 256;
+    const OBJECT_STRING = 512;
+    const OBJECT_COUNT  = 1024;
+    const ALL           = 2047;
+
+    const INVALID  = 'notEmptyInvalid';
+    const IS_EMPTY = 'isEmpty';
+
+    protected $_constants = array(
+        self::BOOLEAN       => 'boolean',
+        self::INTEGER       => 'integer',
+        self::FLOAT         => 'float',
+        self::STRING        => 'string',
+        self::ZERO          => 'zero',
+        self::EMPTY_ARRAY   => 'array',
+        self::NULL          => 'null',
+        self::PHP           => 'php',
+        self::SPACE         => 'space',
+        self::OBJECT        => 'object',
+        self::OBJECT_STRING => 'objectstring',
+        self::OBJECT_COUNT  => 'objectcount',
+        self::ALL           => 'all',
+    );
+
+    /**
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::IS_EMPTY => "Value is required and can't be empty",
+        self::INVALID  => "Invalid type given. String, integer, float, boolean or array expected",
+    );
+
+    /**
+     * Internal type to detect
+     *
+     * @var integer
+     */
+    protected $_type = 493;
+
+    /**
+     * Constructor
+     *
+     * @param string|array|Zend_Config $options OPTIONAL
+     */
+    public function __construct($options = null)
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } else if (!is_array($options)) {
+            $options = func_get_args();
+            $temp    = array();
+            if (!empty($options)) {
+                $temp['type'] = array_shift($options);
+            }
+
+            $options = $temp;
+        }
+
+        if (is_array($options) && array_key_exists('type', $options)) {
+            $this->setType($options['type']);
+        }
+    }
+
+    /**
+     * Returns the set types
+     *
+     * @return array
+     */
+    public function getType()
+    {
+        return $this->_type;
+    }
+
+    /**
+     * Set the types
+     *
+     * @param  integer|array $type
+     * @throws Zend_Validate_Exception
+     * @return Zend_Validate_NotEmpty
+     */
+    public function setType($type = null)
+    {
+        if (is_array($type)) {
+            $detected = 0;
+            foreach($type as $value) {
+                if (is_int($value)) {
+                    $detected += $value;
+                } else if (in_array($value, $this->_constants)) {
+                    $detected += array_search($value, $this->_constants);
+                }
+            }
+
+            $type = $detected;
+        } else if (is_string($type) && in_array($type, $this->_constants)) {
+            $type = array_search($type, $this->_constants);
+        }
+
+        if (!is_int($type) || ($type < 0) || ($type > self::ALL)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Unknown type');
+        }
+
+        $this->_type = $type;
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if $value is not an empty value.
+     *
+     * @param  string $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        if ($value !== null && !is_string($value) && !is_int($value) && !is_float($value) &&
+            !is_bool($value) && !is_array($value) && !is_object($value)) {
+            $this->_error(self::INVALID);
+            return false;
+        }
+
+        $type    = $this->getType();
+        $this->_setValue($value);
+        $object  = false;
+
+        // OBJECT_COUNT (countable object)
+        if ($type >= self::OBJECT_COUNT) {
+            $type -= self::OBJECT_COUNT;
+            $object = true;
+
+            if (is_object($value) && ($value instanceof Countable) && (count($value) == 0)) {
+                $this->_error(self::IS_EMPTY);
+                return false;
+            }
+        }
+
+        // OBJECT_STRING (object's toString)
+        if ($type >= self::OBJECT_STRING) {
+            $type -= self::OBJECT_STRING;
+            $object = true;
+
+            if ((is_object($value) && (!method_exists($value, '__toString'))) ||
+                (is_object($value) && (method_exists($value, '__toString')) && (((string) $value) == ""))) {
+                $this->_error(self::IS_EMPTY);
+                return false;
+            }
+        }
+
+        // OBJECT (object)
+        if ($type >= self::OBJECT) {
+            $type -= self::OBJECT;
+            // fall trough, objects are always not empty
+        } else if ($object === false) {
+            // object not allowed but object given -> return false
+            if (is_object($value)) {
+                $this->_error(self::IS_EMPTY);
+                return false;
+            }
+        }
+
+        // SPACE ('   ')
+        if ($type >= self::SPACE) {
+            $type -= self::SPACE;
+            if (is_string($value) && (preg_match('/^\s+$/s', $value))) {
+                $this->_error(self::IS_EMPTY);
+                return false;
+            }
+        }
+
+        // NULL (null)
+        if ($type >= self::NULL) {
+            $type -= self::NULL;
+            if ($value === null) {
+                $this->_error(self::IS_EMPTY);
+                return false;
+            }
+        }
+
+        // EMPTY_ARRAY (array())
+        if ($type >= self::EMPTY_ARRAY) {
+            $type -= self::EMPTY_ARRAY;
+            if (is_array($value) && ($value == array())) {
+                $this->_error(self::IS_EMPTY);
+                return false;
+            }
+        }
+
+        // ZERO ('0')
+        if ($type >= self::ZERO) {
+            $type -= self::ZERO;
+            if (is_string($value) && ($value == '0')) {
+                $this->_error(self::IS_EMPTY);
+                return false;
+            }
+        }
+
+        // STRING ('')
+        if ($type >= self::STRING) {
+            $type -= self::STRING;
+            if (is_string($value) && ($value == '')) {
+                $this->_error(self::IS_EMPTY);
+                return false;
+            }
+        }
+
+        // FLOAT (0.0)
+        if ($type >= self::FLOAT) {
+            $type -= self::FLOAT;
+            if (is_float($value) && ($value == 0.0)) {
+                $this->_error(self::IS_EMPTY);
+                return false;
+            }
+        }
+
+        // INTEGER (0)
+        if ($type >= self::INTEGER) {
+            $type -= self::INTEGER;
+            if (is_int($value) && ($value == 0)) {
+                $this->_error(self::IS_EMPTY);
+                return false;
+            }
+        }
+
+        // BOOLEAN (false)
+        if ($type >= self::BOOLEAN) {
+            $type -= self::BOOLEAN;
+            if (is_bool($value) && ($value == false)) {
+                $this->_error(self::IS_EMPTY);
+                return false;
+            }
+        }
+
+        return true;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/PostCode.php

@@ -0,0 +1,210 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: PostCode.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @see Zend_Locale_Format
+ */
+require_once 'Zend/Locale/Format.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_PostCode extends Zend_Validate_Abstract
+{
+    const INVALID  = 'postcodeInvalid';
+    const NO_MATCH = 'postcodeNoMatch';
+
+    /**
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::INVALID  => "Invalid type given. String or integer expected",
+        self::NO_MATCH => "'%value%' does not appear to be a postal code",
+    );
+
+    /**
+     * Locale to use
+     *
+     * @var string
+     */
+    protected $_locale;
+
+    /**
+     * Manual postal code format
+     *
+     * @var unknown_type
+     */
+    protected $_format;
+
+    /**
+     * Constructor for the integer validator
+     *
+     * Accepts either a string locale, a Zend_Locale object, or an array or
+     * Zend_Config object containing the keys "locale" and/or "format".
+     *
+     * @param string|Zend_Locale|array|Zend_Config $options
+     * @throws Zend_Validate_Exception On empty format
+     */
+    public function __construct($options = null)
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        }
+
+        if (empty($options)) {
+            require_once 'Zend/Registry.php';
+            if (Zend_Registry::isRegistered('Zend_Locale')) {
+                $this->setLocale(Zend_Registry::get('Zend_Locale'));
+            }
+        } elseif (is_array($options)) {
+            // Received
+            if (array_key_exists('locale', $options)) {
+                $this->setLocale($options['locale']);
+            }
+
+            if (array_key_exists('format', $options)) {
+                $this->setFormat($options['format']);
+            }
+        } elseif ($options instanceof Zend_Locale || is_string($options)) {
+            // Received Locale object or string locale
+            $this->setLocale($options);
+        }
+
+        $format = $this->getFormat();
+        if (empty($format)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("A postcode-format string has to be given for validation");
+        }
+    }
+
+    /**
+     * Returns the set locale
+     *
+     * @return string|Zend_Locale The set locale
+     */
+    public function getLocale()
+    {
+        return $this->_locale;
+    }
+
+    /**
+     * Sets the locale to use
+     *
+     * @param string|Zend_Locale $locale
+     * @throws Zend_Validate_Exception On unrecognised region
+     * @throws Zend_Validate_Exception On not detected format
+     * @return Zend_Validate_PostCode  Provides fluid interface
+     */
+    public function setLocale($locale = null)
+    {
+        require_once 'Zend/Locale.php';
+        $this->_locale = Zend_Locale::findLocale($locale);
+        $locale        = new Zend_Locale($this->_locale);
+        $region        = $locale->getRegion();
+        if (empty($region)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("Unable to detect a region for the locale '$locale'");
+        }
+
+        $format = Zend_Locale::getTranslation(
+            $locale->getRegion(),
+            'postaltoterritory',
+            $this->_locale
+        );
+
+        if (empty($format)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("Unable to detect a postcode format for the region '{$locale->getRegion()}'");
+        }
+
+        $this->setFormat($format);
+        return $this;
+    }
+
+    /**
+     * Returns the set postal code format
+     *
+     * @return string
+     */
+    public function getFormat()
+    {
+        return $this->_format;
+    }
+
+    /**
+     * Sets a self defined postal format as regex
+     *
+     * @param string $format
+     * @throws Zend_Validate_Exception On empty format
+     * @return Zend_Validate_PostCode  Provides fluid interface
+     */
+    public function setFormat($format)
+    {
+        if (empty($format) || !is_string($format)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("A postcode-format string has to be given for validation");
+        }
+
+        if ($format[0] !== '/') {
+            $format = '/^' . $format;
+        }
+
+        if ($format[strlen($format) - 1] !== '/') {
+            $format .= '$/';
+        }
+
+        $this->_format = $format;
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if $value is a valid postalcode
+     *
+     * @param  string $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        $this->_setValue($value);
+        if (!is_string($value) && !is_int($value)) {
+            $this->_error(self::INVALID);
+            return false;
+        }
+
+        $format = $this->getFormat();
+        if (!preg_match($format, $value)) {
+            $this->_error(self::NO_MATCH);
+            return false;
+        }
+
+        return true;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/LessThan.php

@@ -0,0 +1,122 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: LessThan.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_LessThan extends Zend_Validate_Abstract
+{
+    const NOT_LESS = 'notLessThan';
+
+    /**
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::NOT_LESS => "'%value%' is not less than '%max%'"
+    );
+
+    /**
+     * @var array
+     */
+    protected $_messageVariables = array(
+        'max' => '_max'
+    );
+
+    /**
+     * Maximum value
+     *
+     * @var mixed
+     */
+    protected $_max;
+
+    /**
+     * Sets validator options
+     *
+     * @param  mixed|Zend_Config $max
+     * @return void
+     */
+    public function __construct($max)
+    {
+        if ($max instanceof Zend_Config) {
+            $max = $max->toArray();
+        }
+
+        if (is_array($max)) {
+            if (array_key_exists('max', $max)) {
+                $max = $max['max'];
+            } else {
+                require_once 'Zend/Validate/Exception.php';
+                throw new Zend_Validate_Exception("Missing option 'max'");
+            }
+        }
+
+        $this->setMax($max);
+    }
+
+    /**
+     * Returns the max option
+     *
+     * @return mixed
+     */
+    public function getMax()
+    {
+        return $this->_max;
+    }
+
+    /**
+     * Sets the max option
+     *
+     * @param  mixed $max
+     * @return Zend_Validate_LessThan Provides a fluent interface
+     */
+    public function setMax($max)
+    {
+        $this->_max = $max;
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if $value is less than max option
+     *
+     * @param  mixed $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        $this->_setValue($value);
+        if ($this->_max <= $value) {
+            $this->_error(self::NOT_LESS);
+            return false;
+        }
+        return true;
+    }
+
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Alpha.php

@@ -0,0 +1,150 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Alpha.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Alpha extends Zend_Validate_Abstract
+{
+    const INVALID      = 'alphaInvalid';
+    const NOT_ALPHA    = 'notAlpha';
+    const STRING_EMPTY = 'alphaStringEmpty';
+
+    /**
+     * Whether to allow white space characters; off by default
+     *
+     * @var boolean
+     * @deprecated
+     */
+    public $allowWhiteSpace;
+
+    /**
+     * Alphabetic filter used for validation
+     *
+     * @var Zend_Filter_Alpha
+     */
+    protected static $_filter = null;
+
+    /**
+     * Validation failure message template definitions
+     *
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::INVALID      => "Invalid type given. String expected",
+        self::NOT_ALPHA    => "'%value%' contains non alphabetic characters",
+        self::STRING_EMPTY => "'%value%' is an empty string"
+    );
+
+    /**
+     * Sets default option values for this instance
+     *
+     * @param  boolean|Zend_Config $allowWhiteSpace
+     * @return void
+     */
+    public function __construct($allowWhiteSpace = false)
+    {
+        if ($allowWhiteSpace instanceof Zend_Config) {
+            $allowWhiteSpace = $allowWhiteSpace->toArray();
+        }
+
+        if (is_array($allowWhiteSpace)) {
+            if (array_key_exists('allowWhiteSpace', $allowWhiteSpace)) {
+                $allowWhiteSpace = $allowWhiteSpace['allowWhiteSpace'];
+            } else {
+                $allowWhiteSpace = false;
+            }
+        }
+
+        $this->allowWhiteSpace = (boolean) $allowWhiteSpace;
+    }
+
+    /**
+     * Returns the allowWhiteSpace option
+     *
+     * @return boolean
+     */
+    public function getAllowWhiteSpace()
+    {
+        return $this->allowWhiteSpace;
+    }
+
+    /**
+     * Sets the allowWhiteSpace option
+     *
+     * @param boolean $allowWhiteSpace
+     * @return Zend_Filter_Alpha Provides a fluent interface
+     */
+    public function setAllowWhiteSpace($allowWhiteSpace)
+    {
+        $this->allowWhiteSpace = (boolean) $allowWhiteSpace;
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if $value contains only alphabetic characters
+     *
+     * @param  string $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        if (!is_string($value)) {
+            $this->_error(self::INVALID);
+            return false;
+        }
+
+        $this->_setValue($value);
+
+        if ('' === $value) {
+            $this->_error(self::STRING_EMPTY);
+            return false;
+        }
+
+        if (null === self::$_filter) {
+            /**
+             * @see Zend_Filter_Alpha
+             */
+            require_once 'Zend/Filter/Alpha.php';
+            self::$_filter = new Zend_Filter_Alpha();
+        }
+
+        self::$_filter->allowWhiteSpace = $this->allowWhiteSpace;
+
+        if ($value !== self::$_filter->filter($value)) {
+            $this->_error(self::NOT_ALPHA);
+            return false;
+        }
+
+        return true;
+    }
+
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Between.php

@@ -0,0 +1,224 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Between.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Between extends Zend_Validate_Abstract
+{
+    /**
+     * Validation failure message key for when the value is not between the min and max, inclusively
+     */
+    const NOT_BETWEEN        = 'notBetween';
+
+    /**
+     * Validation failure message key for when the value is not strictly between the min and max
+     */
+    const NOT_BETWEEN_STRICT = 'notBetweenStrict';
+
+    /**
+     * Validation failure message template definitions
+     *
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::NOT_BETWEEN        => "'%value%' is not between '%min%' and '%max%', inclusively",
+        self::NOT_BETWEEN_STRICT => "'%value%' is not strictly between '%min%' and '%max%'"
+    );
+
+    /**
+     * Additional variables available for validation failure messages
+     *
+     * @var array
+     */
+    protected $_messageVariables = array(
+        'min' => '_min',
+        'max' => '_max'
+    );
+
+    /**
+     * Minimum value
+     *
+     * @var mixed
+     */
+    protected $_min;
+
+    /**
+     * Maximum value
+     *
+     * @var mixed
+     */
+    protected $_max;
+
+    /**
+     * Whether to do inclusive comparisons, allowing equivalence to min and/or max
+     *
+     * If false, then strict comparisons are done, and the value may equal neither
+     * the min nor max options
+     *
+     * @var boolean
+     */
+    protected $_inclusive;
+
+    /**
+     * Sets validator options
+     * Accepts the following option keys:
+     *   'min' => scalar, minimum border
+     *   'max' => scalar, maximum border
+     *   'inclusive' => boolean, inclusive border values
+     *
+     * @param  array|Zend_Config $options
+     * @return void
+     */
+    public function __construct($options)
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } else if (!is_array($options)) {
+            $options = func_get_args();
+            $temp['min'] = array_shift($options);
+            if (!empty($options)) {
+                $temp['max'] = array_shift($options);
+            }
+
+            if (!empty($options)) {
+                $temp['inclusive'] = array_shift($options);
+            }
+
+            $options = $temp;
+        }
+
+        if (!array_key_exists('min', $options) || !array_key_exists('max', $options)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("Missing option. 'min' and 'max' has to be given");
+        }
+
+        if (!array_key_exists('inclusive', $options)) {
+            $options['inclusive'] = true;
+        }
+
+        $this->setMin($options['min'])
+             ->setMax($options['max'])
+             ->setInclusive($options['inclusive']);
+    }
+
+    /**
+     * Returns the min option
+     *
+     * @return mixed
+     */
+    public function getMin()
+    {
+        return $this->_min;
+    }
+
+    /**
+     * Sets the min option
+     *
+     * @param  mixed $min
+     * @return Zend_Validate_Between Provides a fluent interface
+     */
+    public function setMin($min)
+    {
+        $this->_min = $min;
+        return $this;
+    }
+
+    /**
+     * Returns the max option
+     *
+     * @return mixed
+     */
+    public function getMax()
+    {
+        return $this->_max;
+    }
+
+    /**
+     * Sets the max option
+     *
+     * @param  mixed $max
+     * @return Zend_Validate_Between Provides a fluent interface
+     */
+    public function setMax($max)
+    {
+        $this->_max = $max;
+        return $this;
+    }
+
+    /**
+     * Returns the inclusive option
+     *
+     * @return boolean
+     */
+    public function getInclusive()
+    {
+        return $this->_inclusive;
+    }
+
+    /**
+     * Sets the inclusive option
+     *
+     * @param  boolean $inclusive
+     * @return Zend_Validate_Between Provides a fluent interface
+     */
+    public function setInclusive($inclusive)
+    {
+        $this->_inclusive = $inclusive;
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if $value is between min and max options, inclusively
+     * if inclusive option is true.
+     *
+     * @param  mixed $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        $this->_setValue($value);
+
+        if ($this->_inclusive) {
+            if ($this->_min > $value || $value > $this->_max) {
+                $this->_error(self::NOT_BETWEEN);
+                return false;
+            }
+        } else {
+            if ($this->_min >= $value || $value >= $this->_max) {
+                $this->_error(self::NOT_BETWEEN_STRICT);
+                return false;
+            }
+        }
+        return true;
+    }
+
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Int.php

@@ -0,0 +1,148 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Int.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @see Zend_Locale_Format
+ */
+require_once 'Zend/Locale/Format.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Int extends Zend_Validate_Abstract
+{
+    const INVALID = 'intInvalid';
+    const NOT_INT = 'notInt';
+
+    /**
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::INVALID => "Invalid type given. String or integer expected",
+        self::NOT_INT => "'%value%' does not appear to be an integer",
+    );
+
+    protected $_locale;
+
+    /**
+     * Constructor for the integer validator
+     *
+     * @param string|Zend_Config|Zend_Locale $locale
+     */
+    public function __construct($locale = null)
+    {
+        if ($locale instanceof Zend_Config) {
+            $locale = $locale->toArray();
+        }
+
+        if (is_array($locale)) {
+            if (array_key_exists('locale', $locale)) {
+                $locale = $locale['locale'];
+            } else {
+                $locale = null;
+            }
+        }
+
+        if (empty($locale)) {
+            require_once 'Zend/Registry.php';
+            if (Zend_Registry::isRegistered('Zend_Locale')) {
+                $locale = Zend_Registry::get('Zend_Locale');
+            }
+        }
+
+        if ($locale !== null) {
+            $this->setLocale($locale);
+        }
+    }
+
+    /**
+     * Returns the set locale
+     */
+    public function getLocale()
+    {
+        return $this->_locale;
+    }
+
+    /**
+     * Sets the locale to use
+     *
+     * @param string|Zend_Locale $locale
+     */
+    public function setLocale($locale = null)
+    {
+        require_once 'Zend/Locale.php';
+        $this->_locale = Zend_Locale::findLocale($locale);
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if $value is a valid integer
+     *
+     * @param  string|integer $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        if (!is_string($value) && !is_int($value) && !is_float($value)) {
+            $this->_error(self::INVALID);
+            return false;
+        }
+
+        if (is_int($value)) {
+            return true;
+        }
+
+        $this->_setValue($value);
+        if ($this->_locale === null) {
+            $locale        = localeconv();
+            $valueFiltered = str_replace($locale['decimal_point'], '.', $value);
+            $valueFiltered = str_replace($locale['thousands_sep'], '', $valueFiltered);
+
+            if (strval(intval($valueFiltered)) != $valueFiltered) {
+                $this->_error(self::NOT_INT);
+                return false;
+            }
+
+        } else {
+            try {
+                if (!Zend_Locale_Format::isInteger($value, array('locale' => $this->_locale))) {
+                    $this->_error(self::NOT_INT);
+                    return false;
+                }
+            } catch (Zend_Locale_Exception $e) {
+                $this->_error(self::NOT_INT);
+                return false;
+            }
+        }
+
+        return true;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Isbn.php

@@ -0,0 +1,279 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Isbn.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Isbn extends Zend_Validate_Abstract
+{
+    const AUTO    = 'auto';
+    const ISBN10  = '10';
+    const ISBN13  = '13';
+    const INVALID = 'isbnInvalid';
+    const NO_ISBN = 'isbnNoIsbn';
+
+    /**
+     * Validation failure message template definitions.
+     *
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::INVALID => "Invalid type given. String or integer expected",
+        self::NO_ISBN => "'%value%' is no valid ISBN number",
+    );
+
+    /**
+     * Allowed type.
+     *
+     * @var string
+     */
+    protected $_type = self::AUTO;
+
+    /**
+     * Separator character.
+     *
+     * @var string
+     */
+    protected $_separator = '';
+
+    /**
+     * Set up options.
+     *
+     * @param  Zend_Config|array $options
+     * @throws Zend_Validate_Exception When $options is not valid
+     * @return void
+     */
+    public function __construct($options = array())
+    {
+        // prepare options
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        }
+        if (!is_array($options)) {
+            /**
+             * @see Zend_Validate_Exception
+             */
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Invalid options provided.');
+        }
+
+        // set type
+        if (array_key_exists('type', $options)) {
+            $this->setType($options['type']);
+        }
+
+        // set separator
+        if (array_key_exists('separator', $options)) {
+            $this->setSeparator($options['separator']);
+        }
+    }
+
+    /**
+     * Detect input format.
+     *
+     * @return string
+     */
+    protected function _detectFormat()
+    {
+        // prepare separator and pattern list
+        $sep      = quotemeta($this->_separator);
+        $patterns = array();
+        $lengths  = array();
+
+        // check for ISBN-10
+        if ($this->_type == self::ISBN10 || $this->_type == self::AUTO) {
+            if (empty($sep)) {
+                $pattern = '/^[0-9]{9}[0-9X]{1}$/';
+                $length  = 10;
+            } else {
+                $pattern = "/^[0-9]{1,7}[{$sep}]{1}[0-9]{1,7}[{$sep}]{1}[0-9]{1,7}[{$sep}]{1}[0-9X]{1}$/";
+                $length  = 13;
+            }
+
+            $patterns[$pattern] = self::ISBN10;
+            $lengths[$pattern]  = $length;
+        }
+
+        // check for ISBN-13
+        if ($this->_type == self::ISBN13 || $this->_type == self::AUTO) {
+            if (empty($sep)) {
+                $pattern = '/^[0-9]{13}$/';
+                $length  = 13;
+            } else {
+                $pattern = "/^[0-9]{1,9}[{$sep}]{1}[0-9]{1,5}[{$sep}]{1}[0-9]{1,9}[{$sep}]{1}[0-9]{1,9}[{$sep}]{1}[0-9]{1}$/";
+                $length  = 17;
+            }
+
+            $patterns[$pattern] = self::ISBN13;
+            $lengths[$pattern]  = $length;
+        }
+
+        // check pattern list
+        foreach ($patterns as $pattern => $type) {
+            if ((strlen($this->_value) == $lengths[$pattern]) && preg_match($pattern, $this->_value)) {
+                return $type;
+            }
+        }
+
+        return null;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface.
+     *
+     * Returns true if and only if $value is a valid ISBN.
+     *
+     * @param  string $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        if (!is_string($value) && !is_int($value)) {
+            $this->_error(self::INVALID);
+            return false;
+        }
+
+        $value = (string) $value;
+        $this->_setValue($value);
+
+        switch ($this->_detectFormat()) {
+            case self::ISBN10:
+                // sum
+                $isbn10 = str_replace($this->_separator, '', $value);
+                $sum    = 0;
+                for ($i = 0; $i < 9; $i++) {
+                    $sum += (10 - $i) * $isbn10{$i};
+                }
+
+                // checksum
+                $checksum = 11 - ($sum % 11);
+                if ($checksum == 11) {
+                    $checksum = '0';
+                } elseif ($checksum == 10) {
+                    $checksum = 'X';
+                }
+                break;
+
+            case self::ISBN13:
+                // sum
+                $isbn13 = str_replace($this->_separator, '', $value);
+                $sum    = 0;
+                for ($i = 0; $i < 12; $i++) {
+                    if ($i % 2 == 0) {
+                        $sum += $isbn13{$i};
+                    } else {
+                        $sum += 3 * $isbn13{$i};
+                    }
+                }
+                // checksum
+                $checksum = 10 - ($sum % 10);
+                if ($checksum == 10) {
+                    $checksum = '0';
+                }
+                break;
+
+            default:
+                $this->_error(self::NO_ISBN);
+                return false;
+        }
+
+        // validate
+        if (substr($this->_value, -1) != $checksum) {
+            $this->_error(self::NO_ISBN);
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Set separator characters.
+     *
+     * It is allowed only empty string, hyphen and space.
+     *
+     * @param  string $separator
+     * @throws Zend_Validate_Exception When $separator is not valid
+     * @return Zend_Validate_Isbn Provides a fluent interface
+     */
+    public function setSeparator($separator)
+    {
+        // check separator
+        if (!in_array($separator, array('-', ' ', ''))) {
+            /**
+             * @see Zend_Validate_Exception
+             */
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Invalid ISBN separator.');
+        }
+
+        $this->_separator = $separator;
+        return $this;
+    }
+
+    /**
+     * Get separator characters.
+     *
+     * @return string
+     */
+    public function getSeparator()
+    {
+        return $this->_separator;
+    }
+
+    /**
+     * Set allowed ISBN type.
+     *
+     * @param  string $type
+     * @throws Zend_Validate_Exception When $type is not valid
+     * @return Zend_Validate_Isbn Provides a fluent interface
+     */
+    public function setType($type)
+    {
+        // check type
+        if (!in_array($type, array(self::AUTO, self::ISBN10, self::ISBN13))) {
+            /**
+             * @see Zend_Validate_Exception
+             */
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Invalid ISBN type');
+        }
+
+        $this->_type = $type;
+        return $this;
+    }
+
+    /**
+     * Get allowed ISBN type.
+     *
+     * @return string
+     */
+    public function getType()
+    {
+        return $this->_type;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Exception.php

@@ -0,0 +1,34 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Exception.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Exception
+ */
+require_once 'Zend/Exception.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Exception extends Zend_Exception
+{}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Db/NoRecordExists.php

@@ -0,0 +1,51 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: NoRecordExists.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Db_Abstract
+ */
+require_once 'Zend/Validate/Db/Abstract.php';
+
+/**
+ * Confirms a record does not exist in a table.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @uses       Zend_Validate_Db_Abstract
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Db_NoRecordExists extends Zend_Validate_Db_Abstract
+{
+    public function isValid($value)
+    {
+        $valid = true;
+        $this->_setValue($value);
+
+        $result = $this->_query($value);
+        if ($result) {
+            $valid = false;
+            $this->_error(self::ERROR_RECORD_FOUND);
+        }
+
+        return $valid;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Db/RecordExists.php

@@ -0,0 +1,51 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: RecordExists.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Db_Abstract
+ */
+require_once 'Zend/Validate/Db/Abstract.php';
+
+/**
+ * Confirms a record exists in a table.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @uses       Zend_Validate_Db_Abstract
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Db_RecordExists extends Zend_Validate_Db_Abstract
+{
+    public function isValid($value)
+    {
+        $valid = true;
+        $this->_setValue($value);
+
+        $result = $this->_query($value);
+        if (!$result) {
+            $valid = false;
+            $this->_error(self::ERROR_NO_RECORD_FOUND);
+        }
+
+        return $valid;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Db/Abstract.php

@@ -0,0 +1,351 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Abstract.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * Class for Database record validation
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @uses       Zend_Validate_Abstract
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+abstract class Zend_Validate_Db_Abstract extends Zend_Validate_Abstract
+{
+    /**
+     * Error constants
+     */
+    const ERROR_NO_RECORD_FOUND = 'noRecordFound';
+    const ERROR_RECORD_FOUND    = 'recordFound';
+
+    /**
+     * @var array Message templates
+     */
+    protected $_messageTemplates = array(
+        self::ERROR_NO_RECORD_FOUND => "No record matching '%value%' was found",
+        self::ERROR_RECORD_FOUND    => "A record matching '%value%' was found",
+    );
+
+    /**
+     * @var string
+     */
+    protected $_schema = null;
+
+    /**
+     * @var string
+     */
+    protected $_table = '';
+
+    /**
+     * @var string
+     */
+    protected $_field = '';
+
+    /**
+     * @var mixed
+     */
+    protected $_exclude = null;
+
+    /**
+     * Database adapter to use. If null isValid() will use Zend_Db::getInstance instead
+     *
+     * @var unknown_type
+     */
+    protected $_adapter = null;
+
+    /**
+     * Select object to use. can be set, or will be auto-generated
+     * @var Zend_Db_Select
+     */
+    protected $_select;
+
+    /**
+     * Provides basic configuration for use with Zend_Validate_Db Validators
+     * Setting $exclude allows a single record to be excluded from matching.
+     * Exclude can either be a String containing a where clause, or an array with `field` and `value` keys
+     * to define the where clause added to the sql.
+     * A database adapter may optionally be supplied to avoid using the registered default adapter.
+     *
+     * The following option keys are supported:
+     * 'table'   => The database table to validate against
+     * 'schema'  => The schema keys
+     * 'field'   => The field to check for a match
+     * 'exclude' => An optional where clause or field/value pair to exclude from the query
+     * 'adapter' => An optional database adapter to use
+     *
+     * @param array|Zend_Config $options Options to use for this validator
+     */
+    public function __construct($options)
+    {
+        if ($options instanceof Zend_Db_Select) {
+            $this->setSelect($options);
+            return;
+        }
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } else if (func_num_args() > 1) {
+            $options       = func_get_args();
+            $temp['table'] = array_shift($options);
+            $temp['field'] = array_shift($options);
+            if (!empty($options)) {
+                $temp['exclude'] = array_shift($options);
+            }
+
+            if (!empty($options)) {
+                $temp['adapter'] = array_shift($options);
+            }
+
+            $options = $temp;
+        }
+
+        if (!array_key_exists('table', $options) && !array_key_exists('schema', $options)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Table or Schema option missing!');
+        }
+
+        if (!array_key_exists('field', $options)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Field option missing!');
+        }
+
+        if (array_key_exists('adapter', $options)) {
+            $this->setAdapter($options['adapter']);
+        }
+
+        if (array_key_exists('exclude', $options)) {
+            $this->setExclude($options['exclude']);
+        }
+
+        $this->setField($options['field']);
+        if (array_key_exists('table', $options)) {
+            $this->setTable($options['table']);
+        }
+
+        if (array_key_exists('schema', $options)) {
+            $this->setSchema($options['schema']);
+        }
+    }
+
+    /**
+     * Returns the set adapter
+     *
+     * @return Zend_Db_Adapter
+     */
+    public function getAdapter()
+    {
+        /**
+         * Check for an adapter being defined. if not, fetch the default adapter.
+         */
+        if ($this->_adapter === null) {
+            $this->_adapter = Zend_Db_Table_Abstract::getDefaultAdapter();
+            if (null === $this->_adapter) {
+                require_once 'Zend/Validate/Exception.php';
+                throw new Zend_Validate_Exception('No database adapter present');
+            }
+        }
+        return $this->_adapter;
+    }
+
+    /**
+     * Sets a new database adapter
+     *
+     * @param  Zend_Db_Adapter_Abstract $adapter
+     * @return Zend_Validate_Db_Abstract
+     */
+    public function setAdapter($adapter)
+    {
+        if (!($adapter instanceof Zend_Db_Adapter_Abstract)) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception('Adapter option must be a database adapter!');
+        }
+
+        $this->_adapter = $adapter;
+        return $this;
+    }
+
+    /**
+     * Returns the set exclude clause
+     *
+     * @return string|array
+     */
+    public function getExclude()
+    {
+        return $this->_exclude;
+    }
+
+    /**
+     * Sets a new exclude clause
+     *
+     * @param string|array $exclude
+     * @return Zend_Validate_Db_Abstract
+     */
+    public function setExclude($exclude)
+    {
+        $this->_exclude = $exclude;
+        return $this;
+    }
+
+    /**
+     * Returns the set field
+     *
+     * @return string|array
+     */
+    public function getField()
+    {
+        return $this->_field;
+    }
+
+    /**
+     * Sets a new field
+     *
+     * @param string $field
+     * @return Zend_Validate_Db_Abstract
+     */
+    public function setField($field)
+    {
+        $this->_field = (string) $field;
+        return $this;
+    }
+
+    /**
+     * Returns the set table
+     *
+     * @return string
+     */
+    public function getTable()
+    {
+        return $this->_table;
+    }
+
+    /**
+     * Sets a new table
+     *
+     * @param string $table
+     * @return Zend_Validate_Db_Abstract
+     */
+    public function setTable($table)
+    {
+        $this->_table = (string) $table;
+        return $this;
+    }
+
+    /**
+     * Returns the set schema
+     *
+     * @return string
+     */
+    public function getSchema()
+    {
+        return $this->_schema;
+    }
+
+    /**
+     * Sets a new schema
+     *
+     * @param string $schema
+     * @return Zend_Validate_Db_Abstract
+     */
+    public function setSchema($schema)
+    {
+        $this->_schema = $schema;
+        return $this;
+    }
+
+    /**
+     * Sets the select object to be used by the validator
+     *
+     * @param Zend_Db_Select $select
+     * @return Zend_Validate_Db_Abstract
+     */
+    public function setSelect($select)
+    {
+        if (!$select instanceof Zend_Db_Select) {
+            throw new Zend_Validate_Exception('Select option must be a valid ' .
+                                              'Zend_Db_Select object');
+        }
+        $this->_select = $select;
+        return $this;
+    }
+
+    /**
+     * Gets the select object to be used by the validator.
+     * If no select object was supplied to the constructor,
+     * then it will auto-generate one from the given table,
+     * schema, field, and adapter options.
+     *
+     * @return Zend_Db_Select The Select object which will be used
+     */
+    public function getSelect()
+    {
+        if (null === $this->_select) {
+            $db = $this->getAdapter();
+            /**
+             * Build select object
+             */
+            $select = new Zend_Db_Select($db);
+            $select->from($this->_table, array($this->_field), $this->_schema);
+            if ($db->supportsParameters('named')) {
+                $select->where($db->quoteIdentifier($this->_field, true).' = :value'); // named
+            } else {
+                $select->where($db->quoteIdentifier($this->_field, true).' = ?'); // positional
+            }
+            if ($this->_exclude !== null) {
+                if (is_array($this->_exclude)) {
+                    $select->where(
+                          $db->quoteIdentifier($this->_exclude['field'], true) .
+                            ' != ?', $this->_exclude['value']
+                    );
+                } else {
+                    $select->where($this->_exclude);
+                }
+            }
+            $select->limit(1);
+            $this->_select = $select;
+        }
+        return $this->_select;
+    }
+
+    /**
+     * Run query and returns matches, or null if no matches are found.
+     *
+     * @param  String $value
+     * @return Array when matches are found.
+     */
+    protected function _query($value)
+    {
+        $select = $this->getSelect();
+        /**
+         * Run query
+         */
+        $result = $select->getAdapter()->fetchRow(
+            $select,
+            array('value' => $value), // this should work whether db supports positional or named params
+            Zend_Db::FETCH_ASSOC
+            );
+
+        return $result;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Barcode.php

@@ -0,0 +1,228 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Barcode.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @see Zend_Loader
+ */
+require_once 'Zend/Loader.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Barcode extends Zend_Validate_Abstract
+{
+    const INVALID        = 'barcodeInvalid';
+    const FAILED         = 'barcodeFailed';
+    const INVALID_CHARS  = 'barcodeInvalidChars';
+    const INVALID_LENGTH = 'barcodeInvalidLength';
+
+    protected $_messageTemplates = array(
+        self::FAILED         => "'%value%' failed checksum validation",
+        self::INVALID_CHARS  => "'%value%' contains invalid characters",
+        self::INVALID_LENGTH => "'%value%' should have a length of %length% characters",
+        self::INVALID        => "Invalid type given. String expected",
+    );
+
+    /**
+     * Additional variables available for validation failure messages
+     *
+     * @var array
+     */
+    protected $_messageVariables = array(
+        'length' => '_length'
+    );
+
+    /**
+     * Length for the set subtype
+     *
+     * @var integer
+     */
+    protected $_length;
+
+    /**
+     * Barcode adapter
+     *
+     * @var Zend_Validate_Barcode_BarcodeAdapter
+     */
+    protected $_adapter;
+
+    /**
+     * Generates the standard validator object
+     *
+     * @param  string|Zend_Config|
+     *         Zend_Validate_Barcode_BarcodeAdapter $adapter Barcode adapter to use
+     * @return void
+     * @throws Zend_Validate_Exception
+     */
+    public function __construct($adapter)
+    {
+        if ($adapter instanceof Zend_Config) {
+            $adapter = $adapter->toArray();
+        }
+
+        $options  = null;
+        $checksum = null;
+        if (is_array($adapter)) {
+            if (array_key_exists('options', $adapter)) {
+                $options = $adapter['options'];
+            }
+
+            if (array_key_exists('checksum', $adapter)) {
+                $checksum = $adapter['checksum'];
+            }
+
+            if (array_key_exists('adapter', $adapter)) {
+                $adapter = $adapter['adapter'];
+            } else {
+                require_once 'Zend/Validate/Exception.php';
+                throw new Zend_Validate_Exception("Missing option 'adapter'");
+            }
+        }
+
+        $this->setAdapter($adapter, $options);
+        if ($checksum !== null) {
+            $this->setChecksum($checksum);
+        }
+    }
+
+    /**
+     * Returns the set adapter
+     *
+     * @return Zend_Validate_Barcode_BarcodeAdapter
+     */
+    public function getAdapter()
+    {
+        return $this->_adapter;
+    }
+
+    /**
+     * Sets a new barcode adapter
+     *
+     * @param  string|Zend_Validate_Barcode $adapter Barcode adapter to use
+     * @param  array  $options Options for this adapter
+     * @return void
+     * @throws Zend_Validate_Exception
+     */
+    public function setAdapter($adapter, $options = null)
+    {
+        $adapter = ucfirst(strtolower($adapter));
+        require_once 'Zend/Loader.php';
+        if (Zend_Loader::isReadable('Zend/Validate/Barcode/' . $adapter. '.php')) {
+            $adapter = 'Zend_Validate_Barcode_' . $adapter;
+        }
+
+        if (!class_exists($adapter)) {
+            Zend_Loader::loadClass($adapter);
+        }
+
+        $this->_adapter = new $adapter($options);
+        if (!$this->_adapter instanceof Zend_Validate_Barcode_AdapterInterface) {
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception(
+                "Adapter " . $adapter . " does not implement Zend_Validate_Barcode_AdapterInterface"
+            );
+        }
+
+        return $this;
+    }
+
+    /**
+     * Returns the checksum option
+     *
+     * @return boolean
+     */
+    public function getChecksum()
+    {
+        return $this->getAdapter()->getCheck();
+    }
+
+    /**
+     * Sets the checksum option
+     *
+     * @param  boolean $checksum
+     * @return Zend_Validate_Barcode
+     */
+    public function setChecksum($checksum)
+    {
+        $this->getAdapter()->setCheck($checksum);
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if $value contains a valid barcode
+     *
+     * @param  string $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        if (!is_string($value)) {
+            $this->_error(self::INVALID);
+            return false;
+        }
+
+        $this->_setValue($value);
+        $adapter       = $this->getAdapter();
+        $this->_length = $adapter->getLength();
+        $result        = $adapter->checkLength($value);
+        if (!$result) {
+            if (is_array($this->_length)) {
+                $temp = $this->_length;
+                $this->_length = "";
+                foreach($temp as $length) {
+                    $this->_length .= "/";
+                    $this->_length .= $length;
+                }
+
+                $this->_length = substr($this->_length, 1);
+            }
+
+            $this->_error(self::INVALID_LENGTH);
+            return false;
+        }
+
+        $result = $adapter->checkChars($value);
+        if (!$result) {
+            $this->_error(self::INVALID_CHARS);
+            return false;
+        }
+
+        if ($this->getChecksum()) {
+            $result = $adapter->checksum($value);
+            if (!$result) {
+                $this->_error(self::FAILED);
+                return false;
+            }
+        }
+
+        return true;
+    }
+}
\ No newline at end of file

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/GreaterThan.php

@@ -0,0 +1,124 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: GreaterThan.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_GreaterThan extends Zend_Validate_Abstract
+{
+
+    const NOT_GREATER = 'notGreaterThan';
+
+    /**
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::NOT_GREATER => "'%value%' is not greater than '%min%'",
+    );
+
+    /**
+     * @var array
+     */
+    protected $_messageVariables = array(
+        'min' => '_min'
+    );
+
+    /**
+     * Minimum value
+     *
+     * @var mixed
+     */
+    protected $_min;
+
+    /**
+     * Sets validator options
+     *
+     * @param  mixed|Zend_Config $min
+     * @return void
+     */
+    public function __construct($min)
+    {
+        if ($min instanceof Zend_Config) {
+            $min = $min->toArray();
+        }
+
+        if (is_array($min)) {
+            if (array_key_exists('min', $min)) {
+                $min = $min['min'];
+            } else {
+                require_once 'Zend/Validate/Exception.php';
+                throw new Zend_Validate_Exception("Missing option 'min'");
+            }
+        }
+
+        $this->setMin($min);
+    }
+
+    /**
+     * Returns the min option
+     *
+     * @return mixed
+     */
+    public function getMin()
+    {
+        return $this->_min;
+    }
+
+    /**
+     * Sets the min option
+     *
+     * @param  mixed $min
+     * @return Zend_Validate_GreaterThan Provides a fluent interface
+     */
+    public function setMin($min)
+    {
+        $this->_min = $min;
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if $value is greater than min option
+     *
+     * @param  mixed $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        $this->_setValue($value);
+
+        if ($this->_min >= $value) {
+            $this->_error(self::NOT_GREATER);
+            return false;
+        }
+        return true;
+    }
+
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Ccnum.php

@@ -0,0 +1,112 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Ccnum.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Ccnum extends Zend_Validate_Abstract
+{
+    /**
+     * Validation failure message key for when the value is not of valid length
+     */
+    const LENGTH   = 'ccnumLength';
+
+    /**
+     * Validation failure message key for when the value fails the mod-10 checksum
+     */
+    const CHECKSUM = 'ccnumChecksum';
+
+    /**
+     * Digits filter for input
+     *
+     * @var Zend_Filter_Digits
+     */
+    protected static $_filter = null;
+
+    /**
+     * Validation failure message template definitions
+     *
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::LENGTH   => "'%value%' must contain between 13 and 19 digits",
+        self::CHECKSUM => "Luhn algorithm (mod-10 checksum) failed on '%value%'"
+    );
+
+    public function __construct()
+    {
+        trigger_error('Using the Ccnum validator is deprecated in favor of the CreditCard validator');
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if $value follows the Luhn algorithm (mod-10 checksum)
+     *
+     * @param  string $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        $this->_setValue($value);
+
+        if (null === self::$_filter) {
+            /**
+             * @see Zend_Filter_Digits
+             */
+            require_once 'Zend/Filter/Digits.php';
+            self::$_filter = new Zend_Filter_Digits();
+        }
+
+        $valueFiltered = self::$_filter->filter($value);
+
+        $length = strlen($valueFiltered);
+
+        if ($length < 13 || $length > 19) {
+            $this->_error(self::LENGTH);
+            return false;
+        }
+
+        $sum    = 0;
+        $weight = 2;
+
+        for ($i = $length - 2; $i >= 0; $i--) {
+            $digit = $weight * $valueFiltered[$i];
+            $sum += floor($digit / 10) + $digit % 10;
+            $weight = $weight % 2 + 1;
+        }
+
+        if ((10 - $sum % 10) % 10 != $valueFiltered[$length - 1]) {
+            $this->_error(self::CHECKSUM, $valueFiltered);
+            return false;
+        }
+
+        return true;
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/StringLength.php

@@ -0,0 +1,253 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: StringLength.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Abstract
+ */
+require_once 'Zend/Validate/Abstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_StringLength extends Zend_Validate_Abstract
+{
+    const INVALID   = 'stringLengthInvalid';
+    const TOO_SHORT = 'stringLengthTooShort';
+    const TOO_LONG  = 'stringLengthTooLong';
+
+    /**
+     * @var array
+     */
+    protected $_messageTemplates = array(
+        self::INVALID   => "Invalid type given. String expected",
+        self::TOO_SHORT => "'%value%' is less than %min% characters long",
+        self::TOO_LONG  => "'%value%' is more than %max% characters long",
+    );
+
+    /**
+     * @var array
+     */
+    protected $_messageVariables = array(
+        'min' => '_min',
+        'max' => '_max'
+    );
+
+    /**
+     * Minimum length
+     *
+     * @var integer
+     */
+    protected $_min;
+
+    /**
+     * Maximum length
+     *
+     * If null, there is no maximum length
+     *
+     * @var integer|null
+     */
+    protected $_max;
+
+    /**
+     * Encoding to use
+     *
+     * @var string|null
+     */
+    protected $_encoding;
+
+    /**
+     * Sets validator options
+     *
+     * @param  integer|array|Zend_Config $options
+     * @return void
+     */
+    public function __construct($options = array())
+    {
+        if ($options instanceof Zend_Config) {
+            $options = $options->toArray();
+        } else if (!is_array($options)) {
+            $options     = func_get_args();
+            $temp['min'] = array_shift($options);
+            if (!empty($options)) {
+                $temp['max'] = array_shift($options);
+            }
+
+            if (!empty($options)) {
+                $temp['encoding'] = array_shift($options);
+            }
+
+            $options = $temp;
+        }
+
+        if (!array_key_exists('min', $options)) {
+            $options['min'] = 0;
+        }
+
+        $this->setMin($options['min']);
+        if (array_key_exists('max', $options)) {
+            $this->setMax($options['max']);
+        }
+
+        if (array_key_exists('encoding', $options)) {
+            $this->setEncoding($options['encoding']);
+        }
+    }
+
+    /**
+     * Returns the min option
+     *
+     * @return integer
+     */
+    public function getMin()
+    {
+        return $this->_min;
+    }
+
+    /**
+     * Sets the min option
+     *
+     * @param  integer $min
+     * @throws Zend_Validate_Exception
+     * @return Zend_Validate_StringLength Provides a fluent interface
+     */
+    public function setMin($min)
+    {
+        if (null !== $this->_max && $min > $this->_max) {
+            /**
+             * @see Zend_Validate_Exception
+             */
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("The minimum must be less than or equal to the maximum length, but $min >"
+                                            . " $this->_max");
+        }
+        $this->_min = max(0, (integer) $min);
+        return $this;
+    }
+
+    /**
+     * Returns the max option
+     *
+     * @return integer|null
+     */
+    public function getMax()
+    {
+        return $this->_max;
+    }
+
+    /**
+     * Sets the max option
+     *
+     * @param  integer|null $max
+     * @throws Zend_Validate_Exception
+     * @return Zend_Validate_StringLength Provides a fluent interface
+     */
+    public function setMax($max)
+    {
+        if (null === $max) {
+            $this->_max = null;
+        } else if ($max < $this->_min) {
+            /**
+             * @see Zend_Validate_Exception
+             */
+            require_once 'Zend/Validate/Exception.php';
+            throw new Zend_Validate_Exception("The maximum must be greater than or equal to the minimum length, but "
+                                            . "$max < $this->_min");
+        } else {
+            $this->_max = (integer) $max;
+        }
+
+        return $this;
+    }
+
+    /**
+     * Returns the actual encoding
+     *
+     * @return string
+     */
+    public function getEncoding()
+    {
+        return $this->_encoding;
+    }
+
+    /**
+     * Sets a new encoding to use
+     *
+     * @param string $encoding
+     * @return Zend_Validate_StringLength
+     */
+    public function setEncoding($encoding = null)
+    {
+        if ($encoding !== null) {
+            $orig   = iconv_get_encoding('internal_encoding');
+            $result = iconv_set_encoding('internal_encoding', $encoding);
+            if (!$result) {
+                require_once 'Zend/Validate/Exception.php';
+                throw new Zend_Validate_Exception('Given encoding not supported on this OS!');
+            }
+
+            iconv_set_encoding('internal_encoding', $orig);
+        }
+
+        $this->_encoding = $encoding;
+        return $this;
+    }
+
+    /**
+     * Defined by Zend_Validate_Interface
+     *
+     * Returns true if and only if the string length of $value is at least the min option and
+     * no greater than the max option (when the max option is not null).
+     *
+     * @param  string $value
+     * @return boolean
+     */
+    public function isValid($value)
+    {
+        if (!is_string($value)) {
+            $this->_error(self::INVALID);
+            return false;
+        }
+
+        $this->_setValue($value);
+        if ($this->_encoding !== null) {
+            $length = iconv_strlen($value, $this->_encoding);
+        } else {
+            $length = iconv_strlen($value);
+        }
+
+        if ($length < $this->_min) {
+            $this->_error(self::TOO_SHORT);
+        }
+
+        if (null !== $this->_max && $this->_max < $length) {
+            $this->_error(self::TOO_LONG);
+        }
+
+        if (count($this->_messages)) {
+            return false;
+        } else {
+            return true;
+        }
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Barcode/Royalmail.php

@@ -0,0 +1,121 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Royalmail.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Barcode_AdapterAbstract
+ */
+require_once 'Zend/Validate/Barcode/AdapterAbstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Barcode_Royalmail extends Zend_Validate_Barcode_AdapterAbstract
+{
+    /**
+     * Allowed barcode lengths
+     * @var integer
+     */
+    protected $_length = -1;
+
+    /**
+     * Allowed barcode characters
+     * @var string
+     */
+    protected $_characters = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ';
+
+    protected $_rows = array(
+        '0' => 1, '1' => 1, '2' => 1, '3' => 1, '4' => 1, '5' => 1,
+        '6' => 2, '7' => 2, '8' => 2, '9' => 2, 'A' => 2, 'B' => 2,
+        'C' => 3, 'D' => 3, 'E' => 3, 'F' => 3, 'G' => 3, 'H' => 3,
+        'I' => 4, 'J' => 4, 'K' => 4, 'L' => 4, 'M' => 4, 'N' => 4,
+        'O' => 5, 'P' => 5, 'Q' => 5, 'R' => 5, 'S' => 5, 'T' => 5,
+        'U' => 0, 'V' => 0, 'W' => 0, 'X' => 0, 'Y' => 0, 'Z' => 0,
+     );
+
+    protected $_columns = array(
+        '0' => 1, '1' => 2, '2' => 3, '3' => 4, '4' => 5, '5' => 0,
+        '6' => 1, '7' => 2, '8' => 3, '9' => 4, 'A' => 5, 'B' => 0,
+        'C' => 1, 'D' => 2, 'E' => 3, 'F' => 4, 'G' => 5, 'H' => 0,
+        'I' => 1, 'J' => 2, 'K' => 3, 'L' => 4, 'M' => 5, 'N' => 0,
+        'O' => 1, 'P' => 2, 'Q' => 3, 'R' => 4, 'S' => 5, 'T' => 0,
+        'U' => 1, 'V' => 2, 'W' => 3, 'X' => 4, 'Y' => 5, 'Z' => 0,
+    );
+
+    /**
+     * Checksum function
+     * @var string
+     */
+    protected $_checksum = '_royalmail';
+
+    /**
+     * Validates the checksum ()
+     *
+     * @param  string $value The barcode to validate
+     * @return boolean
+     */
+    protected function _royalmail($value)
+    {
+        $checksum = substr($value, -1, 1);
+        $values   = str_split(substr($value, 0, -1));
+        $rowvalue = 0;
+        $colvalue = 0;
+        foreach($values as $row) {
+            $rowvalue += $this->_rows[$row];
+            $colvalue += $this->_columns[$row];
+        }
+
+        $rowvalue %= 6;
+        $colvalue %= 6;
+
+        $rowchkvalue = array_keys($this->_rows, $rowvalue);
+        $colchkvalue = array_keys($this->_columns, $colvalue);
+        $chkvalue    = current(array_intersect($rowchkvalue, $colchkvalue));
+        if ($chkvalue == $checksum) {
+            return true;
+        }
+
+        return false;
+    }
+
+    /**
+     * Allows start and stop tag within checked chars
+     *
+     * @param  string $value The barcode to check for allowed characters
+     * @return boolean
+     */
+    public function checkChars($value)
+    {
+        if ($value[0] == '(') {
+            $value = substr($value, 1);
+
+            if ($value[strlen($value) - 1] == ')') {
+                $value = substr($value, 0, -1);
+            } else {
+                return false;
+            }
+        }
+
+        return parent::checkChars($value);
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Barcode/Sscc.php

@@ -0,0 +1,52 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Sscc.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Barcode_AdapterAbstract
+ */
+require_once 'Zend/Validate/Barcode/AdapterAbstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Barcode_Sscc extends Zend_Validate_Barcode_AdapterAbstract
+{
+    /**
+     * Allowed barcode lengths
+     * @var integer
+     */
+    protected $_length = 18;
+
+    /**
+     * Allowed barcode characters
+     * @var string
+     */
+    protected $_characters = '0123456789';
+
+    /**
+     * Checksum function
+     * @var string
+     */
+    protected $_checksum = '_gtin';
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Barcode/Ean8.php

@@ -0,0 +1,69 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Ean8.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Barcode_AdapterAbstract
+ */
+require_once 'Zend/Validate/Barcode/AdapterAbstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Barcode_Ean8 extends Zend_Validate_Barcode_AdapterAbstract
+{
+    /**
+     * Allowed barcode lengths
+     * @var integer
+     */
+    protected $_length = array(7, 8);
+
+    /**
+     * Allowed barcode characters
+     * @var string
+     */
+    protected $_characters = '0123456789';
+
+    /**
+     * Checksum function
+     * @var string
+     */
+    protected $_checksum = '_gtin';
+
+    /**
+     * Overrides parent checkLength
+     *
+     * @param string $value Value
+     * @return boolean
+     */
+    public function checkLength($value)
+    {
+        if (strlen($value) == 7) {
+            $this->setCheck(false);
+        } else {
+            $this->setCheck(true);
+        }
+
+        return parent::checkLength($value);
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Barcode/Upce.php

@@ -0,0 +1,69 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Upce.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Barcode_AdapterAbstract
+ */
+require_once 'Zend/Validate/Barcode/AdapterAbstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Barcode_Upce extends Zend_Validate_Barcode_AdapterAbstract
+{
+    /**
+     * Allowed barcode lengths
+     * @var integer
+     */
+    protected $_length = array(6, 7, 8);
+
+    /**
+     * Allowed barcode characters
+     * @var string
+     */
+    protected $_characters = '0123456789';
+
+    /**
+     * Checksum function
+     * @var string
+     */
+    protected $_checksum = '_gtin';
+
+    /**
+     * Overrides parent checkLength
+     *
+     * @param string $value Value
+     * @return boolean
+     */
+    public function checkLength($value)
+    {
+        if (strlen($value) != 8) {
+            $this->setCheck(false);
+        } else {
+            $this->setCheck(true);
+        }
+
+        return parent::checkLength($value);
+    }
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Barcode/Postnet.php

@@ -0,0 +1,52 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Postnet.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Barcode_AdapterAbstract
+ */
+require_once 'Zend/Validate/Barcode/AdapterAbstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Barcode_Postnet extends Zend_Validate_Barcode_AdapterAbstract
+{
+    /**
+     * Allowed barcode lengths
+     * @var integer
+     */
+    protected $_length = array(6, 7, 10, 12);
+
+    /**
+     * Allowed barcode characters
+     * @var string
+     */
+    protected $_characters = '0123456789';
+
+    /**
+     * Checksum function
+     * @var string
+     */
+    protected $_checksum = '_postnet';
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Barcode/Identcode.php

@@ -0,0 +1,52 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Identcode.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Barcode_AdapterAbstract
+ */
+require_once 'Zend/Validate/Barcode/AdapterAbstract.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Validate_Barcode_Identcode extends Zend_Validate_Barcode_AdapterAbstract
+{
+    /**
+     * Allowed barcode lengths
+     * @var integer
+     */
+    protected $_length = 12;
+
+    /**
+     * Allowed barcode characters
+     * @var string
+     */
+    protected $_characters = '0123456789';
+
+    /**
+     * Checksum function
+     * @var string
+     */
+    protected $_checksum = '_identcode';
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Barcode/AdapterInterface.php

@@ -0,0 +1,68 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: AdapterInterface.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+interface Zend_Validate_Barcode_AdapterInterface
+{
+    /**
+     * Checks the length of a barcode
+     *
+     * @param  string $value  The barcode to check for proper length
+     * @return boolean
+     */
+    public function checkLength($value);
+
+    /**
+     * Checks for allowed characters within the barcode
+     *
+     * @param  string $value The barcode to check for allowed characters
+     * @return boolean
+     */
+    public function checkChars($value);
+
+    /**
+     * Validates the checksum
+     *
+     * @param string $value The barcode to check the checksum for
+     * @return boolean
+     */
+    public function checksum($value);
+
+    /**
+     * Returns if barcode uses a checksum
+     *
+     * @return boolean
+     */
+    public function getCheck();
+
+    /**
+     * Sets the checksum validation
+     *
+     * @param  boolean $check
+     * @return Zend_Validate_Barcode_Adapter Provides fluid interface
+     */
+    public function setCheck($check);
+}

Diggin_Scraper_Adapter_Htmlscraping/trunk/tests/vendor/Zend/Validate/Barcode/AdapterAbstract.php

@@ -0,0 +1,315 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: AdapterAbstract.php 23775 2011-03-01 17:25:24Z ralph $
+ */
+
+/**
+ * @see Zend_Validate_Barcode_AdapterInterface
+ */
+require_once 'Zend/Validate/Barcode/AdapterInterface.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Validate
+ * @copyright  Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+abstract class Zend_Validate_Barcode_AdapterAbstract
+    implements Zend_Validate_Barcode_AdapterInterface
+{
+    /**
+     * Allowed barcode lengths
+     * @var integer|array|string
+     */
+    protected $_length;
+
+    /**
+     * Allowed barcode characters
+     * @var string
+     */
+    protected $_characters;
+
+    /**
+     * Callback to checksum function
+     * @var string|array
+     */
+    protected $_checksum;
+
+    /**
+     * Is a checksum value included?
+     * @var boolean
+     */
+    protected $_hasChecksum = true;
+
+    /**
+     * Checks the length of a barcode
+     *
+     * @param  string $value The barcode to check for proper length
+     * @return boolean
+     */
+    public function checkLength($value)
+    {
+        if (!is_string($value)) {
+            return false;
+        }
+
+        $fixum  = strlen($value);
+        $found  = false;
+        $length = $this->getLength();
+        if (is_array($length)) {
+            foreach ($length as $value) {
+                if ($fixum == $value) {
+                    $found = true;
+                }
+
+                if ($value == -1) {
+                    $found = true;
+                }
+            }
+        } elseif ($fixum == $length) {
+            $found = true;
+        } elseif ($length == -1) {
+            $found = true;
+        } elseif ($length == 'even') {
+            $count = $fixum % 2;
+            $found = ($count == 0) ? true : false;
+        } elseif ($length == 'odd') {
+            $count = $fixum % 2;
+            $found = ($count == 1) ? true : false;
+        }
+
+        return $found;
+    }
+
+    /**
+     * Checks for allowed characters within the barcode
+     *
+     * @param  string $value The barcode to check for allowed characters
+     * @return boolean
+     */
+    public function checkChars($value)
+    {
+        if (!is_string($value)) {
+            return false;
+        }
+
+        $characters = $this->getCharacters();
+        if ($characters == 128) {
+            for ($x = 0; $x < 128; ++$x) {
+                $value = str_replace(chr($x), '', $value);
+            }
+        } else {
+            $chars = str_split($characters);
+            foreach ($chars as $char) {
+                $value = str_replace($char, '', $value);
+            }
+        }
+
+        if (strlen($value) > 0) {
+            return false;
+        }
+
+        return true;
+    }
+
+    /**
+     * Validates the checksum
+     *
+     * @param  string $value The barcode to check the checksum for
+     * @return boolean
+     */
+    public function checksum($value)
+    {
+        $checksum = $this->getChecksum();
+        if (!empty($checksum)) {
+            if (method_exists($this, $checksum)) {
+                return call_user_func(array($this, $checksum), $value);
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * Returns the allowed barcode length
+     *
+     * @return string
+     */
+    public function getLength()
+    {
+        return $this->_length;
+    }
+
+    /**
+     * Returns the allowed characters
+     *
+     * @return integer|string
+     */
+    public function getCharacters()
+    {
+        return $this->_characters;
+    }
+
+    /**
+     * Returns the checksum function name
+     *
+     */
+    public function getChecksum()
+    {
+        return $this->_checksum;
+    }
+
+    /**
+     * Returns if barcode uses checksum
+     *
+     * @return boolean
+     */
+    public function getCheck()
+    {
+        return $this->_hasChecksum;
+    }
+
+    /**
+     * Sets the checksum validation
+     *
+     * @param  boolean $check
+     * @return Zend_Validate_Barcode_AdapterAbstract
+     */
+    public function setCheck($check)
+    {
+        $this->_hasChecksum = (boolean) $check;
+        return $this;
+    }
+
+    /**
+     * Validates the checksum (Modulo 10)
+     * GTIN implementation factor 3
+     *
+     * @param  string $value The barcode to validate
+     * @return boolean
+     */
+    protected function _gtin($value)
+    {
+        $barcode = substr($value, 0, -1);
+        $sum     = 0;
+        $length  = strlen($barcode) - 1;
+
+        for ($i = 0; $i <= $length; $i++) {
+            if (($i % 2) === 0) {
+                $sum += $barcode[$length - $i] * 3;
+            } else {
+                $sum += $barcode[$length - $i];
+            }
+        }
+
+        $calc     = $sum % 10;
+        $checksum = ($calc === 0) ? 0 : (10 - $calc);
+        if ($value[$length + 1] != $checksum) {
+            return false;
+        }
+
+        return true;
+    }
+
+    /**
+     * Validates the checksum (Modulo 10)
+     * IDENTCODE implementation factors 9 and 4
+     *
+     * @param  string $value The barcode to validate
+     * @return boolean
<