Diffs
Diggin_Http_Response_CharactorEncoding/trunk/tests/Diggin/Http/Response/CharactorEncodingTest.php
@@ -0,0 +1,169 @@
+<?php
+require_once 'PHPUnit/Framework.php';
+
+require_once 'Diggin/Http/Response/CharactorEncoding.php';
+
+require_once 'Zend/Http/Response.php';
+
+/**
+ * Test class for Diggin_Http_Response_CharactorEncoding
+ * borrowd Diggin_Http_Response_Encoding
+ */
+class Diggin_Http_Response_CharactorEncodingTest extends PHPUnit_Framework_TestCase
+{
+ /**
+ * @var Diggin_Http_Response_CharactorEncoding
+ * @access protected
+ */
+ protected $object;
+
+ protected $responseHeaderUTF8;
+
+ private $detectOrder;
+ /**
+ * Sets up the fixture, for example, opens a network connection.
+ * This method is called before a test is executed.
+ *
+ * @access protected
+ */
+ protected function setUp()
+ {
+
+ $this->detectOrder = mb_detect_order();
+
+ $this->object = new Diggin_Http_Response_CharactorEncoding;
+ $this->responseHeaderUTF8 =
+ "HTTP/1.1 200 OK" ."\r\n".
+ "Date: Sat, 02 Aug 2008 15:17:11 GMT"."\r\n".
+ "Server: Apache/2.2.6 (Win32) mod_ssl/2.2.6 OpenSSL/0.9.8e PHP/5.2.5"."\r\n".
+ "Last-modified: Sun, 29 Jun 2008 21:20:50 GMT"."\r\n".
+ "Accept-ranges: bytes" . "\r\n" .
+ "Content-length: 1000" . "\r\n" .
+ "Connection: close" . "\r\n" .
+ "Content-type: text/html; charset=utf-8;";
+ }
+
+ /**
+ * Tears down the fixture, for example, closes a network connection.
+ * This method is called after a test is executed.
+ *
+ * @access protected
+ */
+ protected function tearDown()
+ {
+ mb_detect_order($this->detectOrder);
+ }
+
+ /**
+ * test "detect" part.1
+ *
+ */
+ public function testDetectOnlyResponseBody() {
+ //
+ $this->assertEquals('Shift_JIS',
+ $this->object->detect(pack("C2", 0x87, 0x40)));
+
+ //@see http://homepage2.nifty.com/Catra/memo/perl_pack.html
+ $this->assertEquals('EUC-JP',
+ $this->object->detect(pack("C4", 164, 164, 164, 164)));
+
+ //this source is encoding with UTF-8.
+ //if parameter has non-AlNum, must detect as UTF-8
+ $this->assertEquals('UTF-8',
+ $this->object->detect('あ1ab'));
+
+
+ }
+
+ public function testDetectWithMetaTag() {
+ //require_once 'Diggin/Http/';
+$body = <<<BODY
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=Shift_JIS">
+ <title>test</title>
+ </head>
+<body>
+</body>
+BODY;
+ $this->assertEquals('Shift_JIS',
+ $this->object->detect($body));
+ }
+
+
+ /**
+ * test "detect" part.2
+ */
+ public function testDetectWithHeadersContentType() {
+ //////header("Content-type: text/html; charset=utf-8;");
+ $header = "Content-type: text/html; charset=utf-8;";
+
+ $bodyUTF8 = <<<BODY
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=Shift_JIS">
+ <title>test</title>
+ </head>
+<body>
+</body>
+BODY;
+ $bodySJIS = mb_convert_encoding($bodyUTF8, 'SJIS', 'UTF-8');
+
+ //browser
+ $this->assertEquals('UTF-8',
+ $this->object->detect($bodySJIS, $header));
+ }
+
+
+ public function testDetect_Restore() {
+
+ $iniDetectOrder = mb_detect_order();
+
+ $testerDetectOrder = mb_detect_order('UTF-8, SJIS');
+
+$body = <<<BODY
+ <html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=Shift_JIS">
+ <title>test</title>
+ </head>
+ <body>
+ </body>
+BODY;
+ $this->object->setDetectOrder('ASCII, SJIS');
+ $this->object->detect($body); //run mb_detect
+ $this->object->setDetectOrder(Diggin_Http_Response_CharactorEncoding::DETECT_ORDER); //restore object's order
+
+ //restore ok ?
+ $this->assertEquals(array('UTF-8', 'SJIS'), mb_detect_order());
+
+ //restore
+ mb_detect_order($iniDetectOrder);
+ }
+
+
+ /**
+ *
+ *
+ */
+ public function testSetDetectOrder() {
+
+ //
+ $this->assertEquals(Diggin_Http_Response_CharactorEncoding::DETECT_ORDER,
+ Diggin_Http_Response_CharactorEncoding::getDetectOrder());
+
+ $detectOrder = 'SJIS, UTF-8';
+ Diggin_Http_Response_CharactorEncoding::setDetectOrder($detectOrder);
+
+ $this->assertEquals($detectOrder,
+ Diggin_Http_Response_CharactorEncoding::getDetectOrder());
+
+
+ Diggin_Http_Response_CharactorEncoding::setDetectOrder(false);
+
+ $this->assertEquals(Diggin_Http_Response_CharactorEncoding::DETECT_ORDER,
+ Diggin_Http_Response_CharactorEncoding::getDetectOrder());
+
+ }
+}
+?>
Diggin_Http_Response_CharactorEncoding/trunk/tests/Diggin/Http/Response/CharactorEncoding/Wrapper/ZfTest.php
@@ -0,0 +1,105 @@
+<?php
+require_once 'PHPUnit/Framework.php';
+
+require_once 'Diggin/Http/Response/CharactorEncoding/Wrapper/Zf.php';
+
+/**
+ * Test class for Diggin_Http_Response_CharactorEncoding_Wrapper_Zf.
+ * Generated by PHPUnit on 2009-10-03 at 20:37:07.
+ */
+class Diggin_Http_Response_CharactorEncoding_Wrapper_ZfTest extends PHPUnit_Framework_TestCase
+{
+ /**
+ * @var Diggin_Http_Response_CharactorEncoding_Wrapper_Zf
+ * @access protected
+ */
+ protected $object;
+
+ /**
+ * Sets up the fixture, for example, opens a network connection.
+ * This method is called before a test is executed.
+ *
+ * @access protected
+ */
+ protected function setUp()
+ {
+ $this->object = new Diggin_Http_Response_CharactorEncoding_Wrapper_Zf;
+ }
+
+ /**
+ * Tears down the fixture, for example, closes a network connection.
+ * This method is called after a test is executed.
+ *
+ * @access protected
+ */
+ protected function tearDown()
+ {
+ }
+
+ /**
+ * @todo Implement testCreateWrapper().
+ */
+ public function testCreateWrapper()
+ {
+ // Remove the following lines when you implement this test.
+ $this->markTestIncomplete(
+ 'This test has not been implemented yet.'
+ );
+ }
+
+ /**
+ * @todo Implement testGetBody().
+ */
+ public function testGetBody()
+ {
+ // Remove the following lines when you implement this test.
+ $this->markTestIncomplete(
+ 'This test has not been implemented yet.'
+ );
+ }
+
+ /**
+ * @todo Implement testSetEncodingFrom().
+ */
+ public function testSetEncodingFrom()
+ {
+ // Remove the following lines when you implement this test.
+ $this->markTestIncomplete(
+ 'This test has not been implemented yet.'
+ );
+ }
+
+ /**
+ * @todo Implement testGetEncodingFrom().
+ */
+ public function testGetEncodingFrom()
+ {
+ // Remove the following lines when you implement this test.
+ $this->markTestIncomplete(
+ 'This test has not been implemented yet.'
+ );
+ }
+
+ /**
+ * @todo Implement testSetEncodingTo().
+ */
+ public function testSetEncodingTo()
+ {
+ // Remove the following lines when you implement this test.
+ $this->markTestIncomplete(
+ 'This test has not been implemented yet.'
+ );
+ }
+
+ /**
+ * @todo Implement testGetEncodingTo().
+ */
+ public function testGetEncodingTo()
+ {
+ // Remove the following lines when you implement this test.
+ $this->markTestIncomplete(
+ 'This test has not been implemented yet.'
+ );
+ }
+}
+?>
Diggin_Http_Response_CharactorEncoding/trunk/tests/Diggin/TestHelper.php
@@ -0,0 +1,36 @@
+<?php
+
+
+/**
+ * Include PHPUnit dependencies
+ */
+require_once 'PHPUnit/Framework.php';
+require_once 'PHPUnit/Framework/IncompleteTestError.php';
+require_once 'PHPUnit/Framework/TestCase.php';
+require_once 'PHPUnit/Framework/TestSuite.php';
+require_once 'PHPUnit/Runner/Version.php';
+require_once 'PHPUnit/TextUI/TestRunner.php';
+require_once 'PHPUnit/Util/Filter.php';
+
+error_reporting( E_ALL | E_STRICT );
+
+
+//
+$path = explode(PATH_SEPARATOR, get_include_path());
+
+$digginRoot = dirname(dirname(dirname(__FILE__)));
+array_push($path, "$digginRoot/library");
+array_push($path, "$digginRoot/tests");
+set_include_path(implode(PATH_SEPARATOR, $path));
+
+//var_dump($path);
+
+//if (is_readable($digginCoreTests . DIRECTORY_SEPARATOR . 'TestConfiguration.php')) {
+// require_once $digginCoreTests . DIRECTORY_SEPARATOR . 'Diggin' .DIRECTORY_SEPARATOR.'TestConfiguration.php';
+//} else {
+// require_once $digginCoreTests . DIRECTORY_SEPARATOR . 'Diggin' . DIRECTORY_SEPARATOR .'TestConfiguration.php.dist';
+//}
+
+unset($digginRoot,$path);
+
+
Diggin_Http_Response_CharactorEncoding/trunk/library/Diggin/Http/Response/CharactorEncoding.php
@@ -0,0 +1,194 @@
+<?php
+
+/**
+ * Original code borrowed from HTMLScraping
+ *
+ * @see http://www.rcdtokyo.com/etc/htmlscraping/
+ *
+ * ---------------------------------------------------------------------
+ * HTMLScraping class
+ * ---------------------------------------------------------------------
+ * PHP versions 5 (5.1.3 and later)
+ * ---------------------------------------------------------------------
+ * LICENSE: This source file is subject to the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * either version 2.1 of the License, or any later version
+ * that is available through the world-wide-web at the following URI:
+ * http://www.gnu.org/licenses/lgpl.html
+ * If you did not have a copy of the GNU Lesser General Public License
+ * and are unable to obtain it through the web, please write to
+ * the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ---------------------------------------------------------------------
+ */
+
+/**
+ * Diggin - Simplicity PHP Library
+ *
+ * @category Diggin
+ * @package Diggin_Http
+ * @subpackage Response
+ */
+class Diggin_Http_Response_CharactorEncoding
+{
+ const DETECT_ORDER = 'ASCII, JIS, UTF-8, EUC-JP, SJIS';
+
+ /**
+ * @var string $_detectOrder
+ */
+ private static $_detectOrder = 'ASCII, JIS, UTF-8, EUC-JP, SJIS';
+
+ /**
+ * Set detect-order (static)
+ *
+ * @param string $order
+ */
+ public static function setDetectOrder($order)
+ {
+ if ($order === false) {
+ self::$_detectOrder = self::DETECT_ORDER;
+ } else {
+ self::$_detectOrder = $order;
+ }
+ }
+
+ /**
+ * Get detecting order
+ *
+ * @return string
+ */
+ public static function getDetectOrder()
+ {
+ return self::$_detectOrder;
+ }
+
+ /**
+ * Convert character encoding - mbstring or iconv
+ *
+ * @param mixed $vars
+ * @param string $encodingfrom
+ * @param string $encodingto
+ * @return mixed
+ * @throws Diggin_Http_Response_CharactorEncoding_Exception
+ */
+ public static function mbconvert($vars, $encodingfrom, $encodingto = 'UTF-8')
+ {
+ if (extension_loaded('mbstring')) {
+ @mb_convert_variables($encodingto, $encodingfrom, $vars);
+ } else {
+ if (is_string($vars)) {
+ $vars = array($vars);
+ }
+ foreach ($vars as $key => $value) {
+ if (false === $convertVars[$key] = @iconv($encodingfrom, $encodingto, $value)) {
+ require_once 'Diggin/Http/Response/CharactorEncoding/Exception.php';
+ throw new Diggin_Http_Response_CharactorEncoding_Exception('Failed converting character encoding.');
+ }
+ }
+ }
+
+ return $vars;
+ }
+
+ /**
+ * Create Wrapper instance accoring param's Response Object
+ *
+ * @param Object $response
+ * @param string $encodingto
+ * @return mixed
+ */
+ public static function createWrapper($response, $encodingto = 'UTF-8')
+ {
+ if ($response instanceof Zend_Http_Response) {
+ $detect = self::detect($response);
+ require_once 'Diggin/Http/Response/CharactorEncoding/Wrapper/Zf.php';
+ return Diggin_Http_Response_CharactorEncoding_Wrapper_Zf::createWrapper($response, $detect, $encodingto);
+ } else {
+ require_once 'Diggin/Http/Response/CharactorEncoding/Exception.php';
+ throw new Diggin_Http_Response_CharactorEncoding_Exception('Unknown Object Type..');
+ }
+ }
+
+ /**
+ * Detect response's character code name
+ *
+ * @param string $responseBody
+ * @param string $contentType
+ * @return string $encoding
+ */
+ public static function detect($responseBody, $contentType = null)
+ {
+ $encoding = false;
+ if (isset($contentType)) {
+ $encoding = self::_getCharsetFromCType($contentType);
+ }
+ if (!$encoding and preg_match_all('/<meta\b[^>]*?>/si', $responseBody, $matches)) {
+ foreach ($matches[0] as $value) {
+ if (strtolower(self::_getAttribute('http-equiv', $value)) == 'content-type'
+ and false !== $encoding = self::_getAttribute('content', $value)) {
+ $encoding = self::_getCharsetFromCType($encoding);
+ break;
+ }
+ }
+ }
+
+ /*
+ * Use mbstring to detect character encoding if available.
+ */
+ if (extension_loaded('mbstring') and !$encoding) {
+ $detectOrder = mb_detect_order();
+ mb_detect_order(self::getDetectOrder());
+ if (false === $encoding = mb_preferred_mime_name(mb_detect_encoding($responseBody))) {
+ mb_detect_order($detectOrder);//restore
+ require_once 'Diggin/Http/Response/CharactorEncoding/Exception.php';
+ throw new Diggin_Http_Response_CharactorEncoding_Exception('Failed detecting character encoding.');
+ }
+ mb_detect_order($detectOrder);//restore
+ }
+
+ return $encoding;
+ }
+
+ /**
+ * Get Charset From Ctype
+ *
+ * @param string $string
+ * @return mixed
+ */
+ protected static function _getCharsetFromCType($string)
+ {
+ $array = explode(';', $string);
+ /* array_walk($array, create_function('$item', 'return trim($item);')); */
+ if (isset($array[1])) {
+ $array = explode('=', $array[1]);
+ if (isset($array[1])) {
+ $charset = trim($array[1]);
+ if (preg_match('/^UTF-?8$/i', $charset)) {
+ return 'UTF-8';
+ } elseif (function_exists('mb_preferred_mime_name')) {
+ return @mb_preferred_mime_name($charset);
+ } else {
+ return $charset;
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Get Attribute from meta-tags
+ *
+ * @param string $name:
+ * @param string $string:
+ * @return mixed
+ */
+ protected static function _getAttribute($name, $string)
+ {
+ $search = "'[\s\'\"]\b".$name."\b\s*=\s*([^\s\'\">]+|\'[^\']+\'|\"[^\"]+\")'si";
+ if (preg_match($search, $string, $matches)) {
+ return preg_replace('/^\s*[\'\"](.+)[\'\"]\s*$/s', '$1', $matches[1]);
+ } else {
+ return false;
+ }
+ }
+}
Diggin_Http_Response_CharactorEncoding/trunk/library/Diggin/Http/Response/CharactorEncoding/Wrapper/Zf.php
@@ -0,0 +1,105 @@
+<?php
+
+/**
+ * Diggin - Simplicity PHP Library
+ *
+ * @category Diggin
+ * @package Diggin_Http
+ * @subpackage Response_CharactorEncoding
+ */
+
+/** Zend_Http_Response */
+require_once 'Zend/Http/Response.php';
+/** Diggin_Http_Response_CharactorEncoding_Wrapper_WrapperInterface */
+require_once 'Diggin/Http/Response/CharactorEncoding/Wrapper/WrapperInterface.php';
+
+class Diggin_Http_Response_CharactorEncoding_Wrapper_Zf
+ extends Zend_Http_Response implements Diggin_Http_Response_CharactorEncoding_Wrapper_WrapperInterface
+{
+ /**
+ * @var string character code names before conversion
+ */
+ private $_encodingFrom;
+
+ /**
+ * @var The type of encoding
+ */
+ private $_encodingTo;
+
+ /**
+ * Create wrapper instance
+ *
+ * @param Zend_Http_Response $response
+ * @param string $encoding_from
+ * @param string $encoding_to
+ * @return Diggin_Http_Response_CharactorEncoding_Wrapper_Zf
+ */
+ public static function createWrapper($response, $encoding_from, $encoding_to = 'UTF-8')
+ {
+ $httpResponse = new self($response->getStatus(),
+ $response->getHeaders(),
+ $response->getRawBody(),
+ $response->getVersion(),
+ $response->getMessage());
+
+ $httpResponse->setEncodingFrom($encoding_from);
+ $httpResponse->setEncodingTo($encoding_to);
+
+ return $httpResponse;
+ }
+
+ /**
+ * Get converted response's body
+ *
+ * @return string
+ */
+ public function getBody()
+ {
+ require_once 'Diggin/Http/Response/CharactorEncoding.php';
+ $body = Diggin_Http_Response_CharactorEncoding::mbconvert(parent::getBody(),
+ $this->getEncodingFrom(),
+ $this->getEncodingTo());
+ return $body;
+ }
+
+
+ /**
+ * Set character code name before conversion
+ *
+ * @param string $encoding_from
+ */
+ final public function setEncodingFrom($encoding_from)
+ {
+ $this->_encodingFrom = $encoding_from;
+ }
+
+ /**
+ * Get character code name before conversion
+ *
+ * @return string
+ */
+ final public function getEncodingFrom()
+ {
+ return $this->_encodingFrom;
+ }
+
+ /**
+ * Set charactor code name that response's body is being converted to
+ *
+ * @param string $encoding_to
+ */
+ final public function setEncodingTo($encoding_to)
+ {
+ $this->_encodingTo = $encoding_to;
+ }
+
+ /**
+ * Get charactor code name that response's body is being converted to
+ *
+ * @return string
+ */
+ final public function getEncodingTo()
+ {
+ return $this->_encodingTo;
+ }
+}
Diggin_Http_Response_CharactorEncoding/trunk/library/Diggin/Http/Response/CharactorEncoding/Wrapper/WrapperInterface.php
@@ -0,0 +1,21 @@
+<?php
+
+/**
+ * Diggin - Simplicity PHP Library
+ *
+ * @category Diggin
+ * @package Diggin_Http
+ * @subpackage Response_CharactorEncoding
+ */
+interface Diggin_Http_Response_CharactorEncoding_Wrapper_WrapperInterface
+{
+ /**
+ * Create wrapper instance
+ *
+ * @param Zend_Http_Response $response
+ * @param string $encoding_from
+ * @param string $encoding_to
+ * @return Diggin_Http_Response_CharactorEncoding_Wrapper_WrapperInterface
+ */
+ public static function createWrapper($response, $encoding_from, $encoding_to = 'UTF-8');
+}