powered by nequal

Diffs

Diggin_Http_Response_CharactorEncoding/trunk/tests/Diggin/Http/Response/CharactorEncodingTest.php

@@ -0,0 +1,169 @@
+<?php
+require_once 'PHPUnit/Framework.php';
+
+require_once 'Diggin/Http/Response/CharactorEncoding.php';
+
+require_once 'Zend/Http/Response.php';
+
+/**
+ * Test class for Diggin_Http_Response_CharactorEncoding
+ * borrowd Diggin_Http_Response_Encoding
+ */
+class Diggin_Http_Response_CharactorEncodingTest extends PHPUnit_Framework_TestCase
+{
+    /**
+     * @var    Diggin_Http_Response_CharactorEncoding
+     * @access protected
+     */
+    protected $object;
+
+    protected $responseHeaderUTF8;
+
+    private $detectOrder;
+    /**
+     * Sets up the fixture, for example, opens a network connection.
+     * This method is called before a test is executed.
+     *
+     * @access protected
+     */
+    protected function setUp()
+    {
+
+        $this->detectOrder = mb_detect_order();
+
+        $this->object = new Diggin_Http_Response_CharactorEncoding;
+        $this->responseHeaderUTF8 =
+           "HTTP/1.1 200 OK"        ."\r\n".
+           "Date: Sat, 02 Aug 2008 15:17:11 GMT"."\r\n".
+           "Server: Apache/2.2.6 (Win32) mod_ssl/2.2.6 OpenSSL/0.9.8e PHP/5.2.5"."\r\n".
+           "Last-modified: Sun, 29 Jun 2008 21:20:50 GMT"."\r\n".
+           "Accept-ranges: bytes"   . "\r\n" .
+           "Content-length: 1000"   . "\r\n" .
+           "Connection: close"      . "\r\n" .
+           "Content-type: text/html; charset=utf-8;";
+    }
+
+    /**
+     * Tears down the fixture, for example, closes a network connection.
+     * This method is called after a test is executed.
+     *
+     * @access protected
+     */
+    protected function tearDown()
+    {
+        mb_detect_order($this->detectOrder);
+    }
+
+    /**
+     * test "detect" part.1
+     *
+     */
+    public function testDetectOnlyResponseBody() {
+        //
+        $this->assertEquals('Shift_JIS',
+                            $this->object->detect(pack("C2", 0x87, 0x40)));
+
+        //@see http://homepage2.nifty.com/Catra/memo/perl_pack.html
+        $this->assertEquals('EUC-JP',
+                            $this->object->detect(pack("C4", 164, 164, 164, 164)));
+
+        //this source is encoding with UTF-8.
+        //if parameter has non-AlNum, must detect as UTF-8
+        $this->assertEquals('UTF-8',
+                            $this->object->detect('あ1ab'));
+
+
+    }
+
+    public function testDetectWithMetaTag() {
+        //require_once 'Diggin/Http/';
+$body = <<<BODY
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=Shift_JIS">
+    <title>test</title>
+    </head>
+<body>
+</body>
+BODY;
+        $this->assertEquals('Shift_JIS',
+                            $this->object->detect($body));
+    }
+
+
+    /**
+     * test "detect" part.2
+     */
+    public function testDetectWithHeadersContentType() {
+        //////header("Content-type: text/html; charset=utf-8;");
+        $header = "Content-type: text/html; charset=utf-8;";
+
+        $bodyUTF8 = <<<BODY
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=Shift_JIS">
+    <title>test</title>
+    </head>
+<body>
+</body>
+BODY;
+        $bodySJIS = mb_convert_encoding($bodyUTF8, 'SJIS', 'UTF-8');
+
+        //browser
+        $this->assertEquals('UTF-8',
+                            $this->object->detect($bodySJIS, $header));
+    }
+
+
+    public function testDetect_Restore() {
+
+        $iniDetectOrder = mb_detect_order();
+
+        $testerDetectOrder = mb_detect_order('UTF-8, SJIS');
+
+$body = <<<BODY
+    <html xmlns="http://www.w3.org/1999/xhtml">
+    <head>
+    <META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=Shift_JIS">
+        <title>test</title>
+            </head>
+            <body>
+            </body>
+BODY;
+        $this->object->setDetectOrder('ASCII, SJIS');
+        $this->object->detect($body); //run mb_detect
+        $this->object->setDetectOrder(Diggin_Http_Response_CharactorEncoding::DETECT_ORDER); //restore object's order
+
+        //restore ok ?
+        $this->assertEquals(array('UTF-8', 'SJIS'), mb_detect_order());
+
+        //restore
+        mb_detect_order($iniDetectOrder);
+    }
+
+
+    /**
+     *
+     *
+     */
+    public function testSetDetectOrder() {
+
+        //
+        $this->assertEquals(Diggin_Http_Response_CharactorEncoding::DETECT_ORDER,
+                            Diggin_Http_Response_CharactorEncoding::getDetectOrder());
+
+        $detectOrder = 'SJIS, UTF-8';
+        Diggin_Http_Response_CharactorEncoding::setDetectOrder($detectOrder);
+
+        $this->assertEquals($detectOrder,
+                            Diggin_Http_Response_CharactorEncoding::getDetectOrder());
+
+
+        Diggin_Http_Response_CharactorEncoding::setDetectOrder(false);
+
+        $this->assertEquals(Diggin_Http_Response_CharactorEncoding::DETECT_ORDER,
+                            Diggin_Http_Response_CharactorEncoding::getDetectOrder());
+
+    }
+}
+?>

Diggin_Http_Response_CharactorEncoding/trunk/tests/Diggin/Http/Response/CharactorEncoding/Wrapper/ZfTest.php

@@ -0,0 +1,105 @@
+<?php
+require_once 'PHPUnit/Framework.php';
+
+require_once 'Diggin/Http/Response/CharactorEncoding/Wrapper/Zf.php';
+
+/**
+ * Test class for Diggin_Http_Response_CharactorEncoding_Wrapper_Zf.
+ * Generated by PHPUnit on 2009-10-03 at 20:37:07.
+ */
+class Diggin_Http_Response_CharactorEncoding_Wrapper_ZfTest extends PHPUnit_Framework_TestCase
+{
+    /**
+     * @var    Diggin_Http_Response_CharactorEncoding_Wrapper_Zf
+     * @access protected
+     */
+    protected $object;
+
+    /**
+     * Sets up the fixture, for example, opens a network connection.
+     * This method is called before a test is executed.
+     *
+     * @access protected
+     */
+    protected function setUp()
+    {
+        $this->object = new Diggin_Http_Response_CharactorEncoding_Wrapper_Zf;
+    }
+
+    /**
+     * Tears down the fixture, for example, closes a network connection.
+     * This method is called after a test is executed.
+     *
+     * @access protected
+     */
+    protected function tearDown()
+    {
+    }
+
+    /**
+     * @todo Implement testCreateWrapper().
+     */
+    public function testCreateWrapper()
+    {
+        // Remove the following lines when you implement this test.
+        $this->markTestIncomplete(
+          'This test has not been implemented yet.'
+        );
+    }
+
+    /**
+     * @todo Implement testGetBody().
+     */
+    public function testGetBody()
+    {
+        // Remove the following lines when you implement this test.
+        $this->markTestIncomplete(
+          'This test has not been implemented yet.'
+        );
+    }
+
+    /**
+     * @todo Implement testSetEncodingFrom().
+     */
+    public function testSetEncodingFrom()
+    {
+        // Remove the following lines when you implement this test.
+        $this->markTestIncomplete(
+          'This test has not been implemented yet.'
+        );
+    }
+
+    /**
+     * @todo Implement testGetEncodingFrom().
+     */
+    public function testGetEncodingFrom()
+    {
+        // Remove the following lines when you implement this test.
+        $this->markTestIncomplete(
+          'This test has not been implemented yet.'
+        );
+    }
+
+    /**
+     * @todo Implement testSetEncodingTo().
+     */
+    public function testSetEncodingTo()
+    {
+        // Remove the following lines when you implement this test.
+        $this->markTestIncomplete(
+          'This test has not been implemented yet.'
+        );
+    }
+
+    /**
+     * @todo Implement testGetEncodingTo().
+     */
+    public function testGetEncodingTo()
+    {
+        // Remove the following lines when you implement this test.
+        $this->markTestIncomplete(
+          'This test has not been implemented yet.'
+        );
+    }
+}
+?>

Diggin_Http_Response_CharactorEncoding/trunk/tests/Diggin/TestHelper.php

@@ -0,0 +1,36 @@
+<?php
+
+
+/**
+ * Include PHPUnit dependencies
+ */
+require_once 'PHPUnit/Framework.php';
+require_once 'PHPUnit/Framework/IncompleteTestError.php';
+require_once 'PHPUnit/Framework/TestCase.php';
+require_once 'PHPUnit/Framework/TestSuite.php';
+require_once 'PHPUnit/Runner/Version.php';
+require_once 'PHPUnit/TextUI/TestRunner.php';
+require_once 'PHPUnit/Util/Filter.php';
+
+error_reporting( E_ALL | E_STRICT );
+
+
+//
+$path = explode(PATH_SEPARATOR, get_include_path());
+
+$digginRoot = dirname(dirname(dirname(__FILE__)));
+array_push($path, "$digginRoot/library");
+array_push($path, "$digginRoot/tests");
+set_include_path(implode(PATH_SEPARATOR, $path));
+
+//var_dump($path);
+
+//if (is_readable($digginCoreTests . DIRECTORY_SEPARATOR . 'TestConfiguration.php')) {
+//    require_once $digginCoreTests . DIRECTORY_SEPARATOR . 'Diggin' .DIRECTORY_SEPARATOR.'TestConfiguration.php';
+//} else {
+//    require_once $digginCoreTests . DIRECTORY_SEPARATOR . 'Diggin' . DIRECTORY_SEPARATOR .'TestConfiguration.php.dist';
+//}
+
+unset($digginRoot,$path);
+
+

Diggin_Http_Response_CharactorEncoding/trunk/library/Diggin/Http/Response/CharactorEncoding.php

@@ -0,0 +1,194 @@
+<?php
+
+/**
+ * Original code borrowed from HTMLScraping
+ *
+ * @see http://www.rcdtokyo.com/etc/htmlscraping/
+ *
+ * ---------------------------------------------------------------------
+ * HTMLScraping class
+ * ---------------------------------------------------------------------
+ * PHP versions 5 (5.1.3 and later)
+ * ---------------------------------------------------------------------
+ * LICENSE: This source file is subject to the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * either version 2.1 of the License, or any later version
+ * that is available through the world-wide-web at the following URI:
+ * http://www.gnu.org/licenses/lgpl.html
+ * If you did not have a copy of the GNU Lesser General Public License
+ * and are unable to obtain it through the web, please write to
+ * the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ---------------------------------------------------------------------
+ */
+
+/**
+ * Diggin - Simplicity PHP Library
+ *
+ * @category   Diggin
+ * @package    Diggin_Http
+ * @subpackage Response
+ */
+class Diggin_Http_Response_CharactorEncoding
+{
+    const DETECT_ORDER = 'ASCII, JIS, UTF-8, EUC-JP, SJIS';
+
+    /**
+     * @var string $_detectOrder
+     */
+    private static $_detectOrder = 'ASCII, JIS, UTF-8, EUC-JP, SJIS';
+
+    /**
+     * Set detect-order (static)
+     *
+     * @param string $order
+     */
+    public static function setDetectOrder($order)
+    {
+        if ($order === false) {
+            self::$_detectOrder = self::DETECT_ORDER;
+        } else {
+            self::$_detectOrder = $order;
+        }
+    }
+
+    /**
+     * Get detecting order
+     *
+     * @return string
+     */
+    public static function getDetectOrder()
+    {
+        return self::$_detectOrder;
+    }
+
+    /**
+     * Convert character encoding - mbstring or iconv
+     *
+     * @param mixed $vars
+     * @param string $encodingfrom
+     * @param string $encodingto
+     * @return mixed
+     * @throws Diggin_Http_Response_CharactorEncoding_Exception
+     */
+    public static function mbconvert($vars, $encodingfrom, $encodingto = 'UTF-8')
+    {
+        if (extension_loaded('mbstring')) {
+            @mb_convert_variables($encodingto, $encodingfrom, $vars);
+        } else {
+            if (is_string($vars)) {
+                $vars = array($vars);
+            }
+            foreach ($vars as $key => $value) {
+                if (false === $convertVars[$key] = @iconv($encodingfrom, $encodingto, $value)) {
+                    require_once 'Diggin/Http/Response/CharactorEncoding/Exception.php';
+                    throw new Diggin_Http_Response_CharactorEncoding_Exception('Failed converting character encoding.');
+                }
+            }
+        }
+
+        return $vars;
+    }
+
+    /**
+     * Create Wrapper instance accoring param's Response Object
+     *
+     * @param Object $response
+     * @param string $encodingto
+     * @return mixed
+     */
+    public static function createWrapper($response, $encodingto = 'UTF-8')
+    {
+        if ($response instanceof Zend_Http_Response) {
+            $detect = self::detect($response);
+            require_once 'Diggin/Http/Response/CharactorEncoding/Wrapper/Zf.php';
+            return Diggin_Http_Response_CharactorEncoding_Wrapper_Zf::createWrapper($response, $detect, $encodingto);
+        } else {
+            require_once 'Diggin/Http/Response/CharactorEncoding/Exception.php';
+            throw new Diggin_Http_Response_CharactorEncoding_Exception('Unknown Object Type..');
+        }
+    }
+
+    /**
+     * Detect response's character code name
+     *
+     * @param string $responseBody
+     * @param string $contentType
+     * @return string $encoding
+     */
+    public static function detect($responseBody, $contentType = null)
+    {
+        $encoding = false;
+        if (isset($contentType)) {
+            $encoding = self::_getCharsetFromCType($contentType);
+        }
+        if (!$encoding and preg_match_all('/<meta\b[^>]*?>/si', $responseBody, $matches)) {
+            foreach ($matches[0] as $value) {
+                if (strtolower(self::_getAttribute('http-equiv', $value)) == 'content-type'
+                    and false !== $encoding = self::_getAttribute('content', $value)) {
+                    $encoding = self::_getCharsetFromCType($encoding);
+                    break;
+                }
+            }
+        }
+
+        /*
+         * Use mbstring to detect character encoding if available.
+         */
+        if (extension_loaded('mbstring') and !$encoding) {
+            $detectOrder = mb_detect_order();
+            mb_detect_order(self::getDetectOrder());
+            if (false === $encoding = mb_preferred_mime_name(mb_detect_encoding($responseBody))) {
+                mb_detect_order($detectOrder);//restore
+                require_once 'Diggin/Http/Response/CharactorEncoding/Exception.php';
+                throw new Diggin_Http_Response_CharactorEncoding_Exception('Failed detecting character encoding.');
+            }
+            mb_detect_order($detectOrder);//restore
+        }
+
+        return $encoding;
+    }
+
+    /**
+     * Get Charset From Ctype
+     *
+     * @param  string  $string
+     * @return mixed
+     */
+    protected static function _getCharsetFromCType($string)
+    {
+        $array = explode(';', $string);
+        /* array_walk($array, create_function('$item', 'return trim($item);')); */
+        if (isset($array[1])) {
+            $array = explode('=', $array[1]);
+            if (isset($array[1])) {
+                $charset = trim($array[1]);
+                if (preg_match('/^UTF-?8$/i', $charset)) {
+                    return 'UTF-8';
+                } elseif (function_exists('mb_preferred_mime_name')) {
+                    return @mb_preferred_mime_name($charset);
+                } else {
+                    return $charset;
+                }
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Get Attribute from meta-tags
+     *
+     * @param string $name:
+     * @param string $string:
+     * @return mixed
+     */
+    protected static function _getAttribute($name, $string)
+    {
+        $search = "'[\s\'\"]\b".$name."\b\s*=\s*([^\s\'\">]+|\'[^\']+\'|\"[^\"]+\")'si";
+        if (preg_match($search, $string, $matches)) {
+            return preg_replace('/^\s*[\'\"](.+)[\'\"]\s*$/s', '$1', $matches[1]);
+        } else {
+            return false;
+        }
+    }
+}

Diggin_Http_Response_CharactorEncoding/trunk/library/Diggin/Http/Response/CharactorEncoding/Wrapper/Zf.php

@@ -0,0 +1,105 @@
+<?php
+
+/**
+ * Diggin - Simplicity PHP Library
+ *
+ * @category   Diggin
+ * @package    Diggin_Http
+ * @subpackage Response_CharactorEncoding
+ */
+
+/** Zend_Http_Response */
+require_once 'Zend/Http/Response.php';
+/** Diggin_Http_Response_CharactorEncoding_Wrapper_WrapperInterface */
+require_once 'Diggin/Http/Response/CharactorEncoding/Wrapper/WrapperInterface.php';
+
+class Diggin_Http_Response_CharactorEncoding_Wrapper_Zf
+    extends Zend_Http_Response implements Diggin_Http_Response_CharactorEncoding_Wrapper_WrapperInterface
+{
+    /**
+     * @var string character code names before conversion
+     */
+    private $_encodingFrom;
+
+    /**
+     * @var The type of encoding
+     */
+    private $_encodingTo;
+
+    /**
+     * Create wrapper instance
+     *
+     * @param Zend_Http_Response $response
+     * @param string $encoding_from
+     * @param string $encoding_to
+     * @return Diggin_Http_Response_CharactorEncoding_Wrapper_Zf
+     */
+    public static function createWrapper($response, $encoding_from, $encoding_to = 'UTF-8')
+    {
+        $httpResponse = new self($response->getStatus(),
+                                 $response->getHeaders(),
+                                 $response->getRawBody(),
+                                 $response->getVersion(),
+                                 $response->getMessage());
+
+        $httpResponse->setEncodingFrom($encoding_from);
+        $httpResponse->setEncodingTo($encoding_to);
+
+        return $httpResponse;
+    }
+
+    /**
+     * Get converted response's body
+     *
+     * @return string
+     */
+    public function getBody()
+    {
+        require_once 'Diggin/Http/Response/CharactorEncoding.php';
+        $body = Diggin_Http_Response_CharactorEncoding::mbconvert(parent::getBody(),
+                                                       $this->getEncodingFrom(),
+                                                       $this->getEncodingTo());
+        return $body;
+    }
+
+
+    /**
+     * Set character code name before conversion
+     *
+     * @param string $encoding_from
+     */
+    final public function setEncodingFrom($encoding_from)
+    {
+        $this->_encodingFrom = $encoding_from;
+    }
+
+    /**
+     * Get character code name before conversion
+     *
+     * @return string
+     */
+    final public function getEncodingFrom()
+    {
+        return $this->_encodingFrom;
+    }
+
+    /**
+     * Set charactor code name that response's body is being converted to
+     *
+     * @param string $encoding_to
+     */
+    final public function setEncodingTo($encoding_to)
+    {
+        $this->_encodingTo = $encoding_to;
+    }
+
+    /**
+     * Get charactor code name that response's body is being converted to
+     *
+     * @return string
+     */
+    final public function getEncodingTo()
+    {
+        return $this->_encodingTo;
+    }
+}

Diggin_Http_Response_CharactorEncoding/trunk/library/Diggin/Http/Response/CharactorEncoding/Wrapper/WrapperInterface.php

@@ -0,0 +1,21 @@
+<?php
+
+/**
+ * Diggin - Simplicity PHP Library
+ *
+ * @category   Diggin
+ * @package    Diggin_Http
+ * @subpackage Response_CharactorEncoding
+ */
+interface Diggin_Http_Response_CharactorEncoding_Wrapper_WrapperInterface
+{
+    /**
+     * Create wrapper instance
+     *
+     * @param Zend_Http_Response $response
+     * @param string $encoding_from
+     * @param string $encoding_to
+     * @return Diggin_Http_Response_CharactorEncoding_Wrapper_WrapperInterface
+     */
+    public static function createWrapper($response, $encoding_from, $encoding_to = 'UTF-8');
+}