Sim
Well-known member
- Affected version
- 2.x
Not actually a bug - but a minor change to the core code will greatly assist in extending functionality for one of my addons.
May I please request a new function be added to
This code is used both in the core and in the XFMG addon - so is a good candidate for a bit of DRY treatment.
So then
... can be changed to:
... and
... can be changed to:
... and then I can easily modify the functionality of the new
Pretty please?
May I please request a new function be added to
XF\Http\Reader
(or somewhere else suitable):
PHP:
public function getCharset($contentType)
{
$charset = null;
if ($contentType)
{
$parts = explode(';', $contentType, 2);
$type = trim($parts[0]);
if ($type != 'text/html')
{
return [];
}
if (isset($parts[1]) && preg_match('/charset=([-a-z0-9_]+)/i', trim($parts[1]), $match))
{
$charset = $match[1];
}
}
return $charset;
}
This code is used both in the core and in the XFMG addon - so is a good candidate for a bit of DRY treatment.
So then
XF\BbCode\ProcessorAction\AutoLink::fetchUrlHtml
, which is currently:
PHP:
protected function fetchUrlHtml($requestUrl)
{
$response = $this->app->http()->reader()->getUntrusted(
$requestUrl,
[
'time' => 5,
'bytes' => 1.5 * 1024 * 1024
]
);
if (!$response || $response->getStatusCode() != 200)
{
return false;
}
$charset = null;
$contentType = $response->getHeader('Content-type');
if ($contentType)
{
$parts = explode(';', $contentType, 2);
$type = trim($parts[0]);
if ($type != 'text/html')
{
return false;
}
if (isset($parts[1]) && preg_match('/charset=([-a-z0-9_]+)/i', trim($parts[1]), $match))
{
$charset = $match[1];
}
}
return [
'body' => $response->getBody()->read(50 * 1024),
'charset' => $charset
];
}
... can be changed to:
PHP:
protected function fetchUrlHtml($requestUrl)
{
$response = $this->app->http()->reader()->getUntrusted(
$requestUrl,
[
'time' => 5,
'bytes' => 1.5 * 1024 * 1024
]
);
if (!$response || $response->getStatusCode() != 200)
{
return false;
}
$contentType = $response->getHeader('Content-type');
$charset = $this->app->http()->reader()->getCharset($contentType);
return [
'body' => $response->getBody()->read(50 * 1024),
'charset' => $charset
];
}
... and
XFMG\EmbedData\BaseData::getTitleAndDescription
, which is currently:
PHP:
public function getTitleAndDescription($url, $bbCodeMediaSiteId, $siteMediaId)
{
$response = $this->app->http()->reader()->getUntrusted(
$url,
[
'time' => 5,
'bytes' => 1.5 * 1024 * 1024
]
);
if (!$response || $response->getStatusCode() != 200)
{
return [];
}
$charset = null;
$contentType = $response->getHeader('Content-type');
if ($contentType)
{
$parts = explode(';', $contentType, 2);
$type = trim($parts[0]);
if ($type != 'text/html')
{
return [];
}
if (isset($parts[1]) && preg_match('/charset=([-a-z0-9_]+)/i', trim($parts[1]), $match))
{
$charset = $match[1];
}
}
$body = $response->getBody()->read(50 * 1024);
$output = [
'title' => '',
'description' => ''
];
if (preg_match('#<meta[^>]+property="(og:|twitter:)title"[^>]*content="([^">]+)"#siU', $body, $match))
{
$output['title'] = isset($match[2]) ? $match[2] : '';
}
if (!$output['title'] && preg_match('#<title[^>]*>(.*)</title>#siU', $body, $match))
{
$output['title'] = $match[1];
}
if (preg_match('#<[\s]*meta[\s]*(name|property)="(og:|twitter:|)description"?[\s]*content="?([^>"]*)"?[\s]*[\/]?[\s]*>#simU', $body, $match))
{
$output['description'] = $match[3];
}
if (!$output['title'] && !$output['description'])
{
return $output;
}
if (!$charset)
{
preg_match('/charset=([^;"\\s]+|"[^;"]+")/i', $body, $contentTypeMatch);
if (isset($contentTypeMatch[1]))
{
$charset = trim($contentTypeMatch[1], " \t\n\r\0\x0B\"");
}
if (!$charset)
{
$charset = 'windows-1252';
}
}
// Clean the string and convert charset where applicable.
return array_map(function($string) use ($charset)
{
if (!$string)
{
return '';
}
$string = \XF::cleanString($string);
// note: assumes charset is ascii compatible
if (preg_match('/[\x80-\xff]/', $string))
{
$newString = false;
if (function_exists('iconv'))
{
$newString = @iconv($charset, 'utf-8//IGNORE', $string);
}
if (!$newString && function_exists('mb_convert_encoding'))
{
$newString = @mb_convert_encoding($string, 'utf-8', $charset);
}
$string = ($newString ? $newString : preg_replace('/[\x80-\xff]/', '', $string));
$string = utf8_unhtml($string, true);
$string = preg_replace('/[\xF0-\xF7].../', '', $string);
$string = preg_replace('/[\xF8-\xFB]..../', '', $string);
}
$string = html_entity_decode($string, ENT_QUOTES | ENT_HTML5, 'UTF-8');
$string = utf8_unhtml($string);
$string = \XF::cleanString($string);
if (!strlen($string))
{
return '';
}
return $string;
}, $output);
}
... can be changed to:
PHP:
public function getTitleAndDescription($url, $bbCodeMediaSiteId, $siteMediaId)
{
$response = $this->app->http()->reader()->getUntrusted(
$url,
[
'time' => 5,
'bytes' => 1.5 * 1024 * 1024
]
);
if (!$response || $response->getStatusCode() != 200)
{
return [];
}
$contentType = $response->getHeader('Content-type');
$charset = $this->app->http()->reader()->getCharset($contentType);
$body = $response->getBody()->read(50 * 1024);
$output = [
'title' => '',
'description' => ''
];
if (preg_match('#<meta[^>]+property="(og:|twitter:)title"[^>]*content="([^">]+)"#siU', $body, $match))
{
$output['title'] = isset($match[2]) ? $match[2] : '';
}
if (!$output['title'] && preg_match('#<title[^>]*>(.*)</title>#siU', $body, $match))
{
$output['title'] = $match[1];
}
if (preg_match('#<[\s]*meta[\s]*(name|property)="(og:|twitter:|)description"?[\s]*content="?([^>"]*)"?[\s]*[\/]?[\s]*>#simU', $body, $match))
{
$output['description'] = $match[3];
}
if (!$output['title'] && !$output['description'])
{
return $output;
}
if (!$charset)
{
preg_match('/charset=([^;"\\s]+|"[^;"]+")/i', $body, $contentTypeMatch);
if (isset($contentTypeMatch[1]))
{
$charset = trim($contentTypeMatch[1], " \t\n\r\0\x0B\"");
}
if (!$charset)
{
$charset = 'windows-1252';
}
}
// Clean the string and convert charset where applicable.
return array_map(function($string) use ($charset)
{
if (!$string)
{
return '';
}
$string = \XF::cleanString($string);
// note: assumes charset is ascii compatible
if (preg_match('/[\x80-\xff]/', $string))
{
$newString = false;
if (function_exists('iconv'))
{
$newString = @iconv($charset, 'utf-8//IGNORE', $string);
}
if (!$newString && function_exists('mb_convert_encoding'))
{
$newString = @mb_convert_encoding($string, 'utf-8', $charset);
}
$string = ($newString ? $newString : preg_replace('/[\x80-\xff]/', '', $string));
$string = utf8_unhtml($string, true);
$string = preg_replace('/[\xF0-\xF7].../', '', $string);
$string = preg_replace('/[\xF8-\xFB]..../', '', $string);
}
$string = html_entity_decode($string, ENT_QUOTES | ENT_HTML5, 'UTF-8');
$string = utf8_unhtml($string);
$string = \XF::cleanString($string);
if (!strlen($string))
{
return '';
}
return $string;
}, $output);
}
... and then I can easily modify the functionality of the new
XF\Http\Reader::getCharset
function to cater for Guzzle6 which returns an array of Content-type headers rather than a string without needing to copy and paste massive amounts of code into all of the children of XFMG\EmbedData\BaseData
because you can't extend a class (using XF class extensions) which is itself extended by other classes.Pretty please?