当前位置: 代码迷 >> PHP >> [原创]替找房方便 ,将sohu焦点的 rss 改成全文的
  详细解决方案

[原创]替找房方便 ,将sohu焦点的 rss 改成全文的

热度:308   发布时间:2012-09-05 15:19:35.0
[原创]为找房方便 ,将sohu焦点的 rss 改成全文的.
房价貌似快要涨了,俺最近天天加班,俺媳妇来负责找房源,可她那狗屎公司无法上网,只好用ipad 加载rss 离线看,结果狗屎sohu焦点的rss 非全文rss ,为了老婆找房方便,写了点代码,用于生成全文的rss. 希望借此机会,尝试一下代码改善生活的感觉. 不废话吧,直接贴代码了.
PHP code

<?php
header("Content-type: text/html; charset=utf-8");
ini_set("max_excution_time"  ,3600);
$url = "http://sz.focus.cn/common/xml/rss/news/hot.php";

$newItemArr = parseItems( $url );
$xmlItemContent = makeRssItemString( $newItemArr ); //重新生成item 内容,将下载内容替换descript.

//rss xml 的头尾字符串
$xmlPre = '
<?xml version="1.0" encoding="GBK"?>
<?xml-stylesheet type="text/xsl" href="/common/xml/xsl/rss.xsl" media="all"?>
<?xml-stylesheet type="text/css" href="/common/xml/css/rss.css" media="all"?>

<rss version="2.0"
xmlns:focus="http://house.focus.cn"
xmlns:F="http://house.focus.cn"
docType="rss"
F:docType="rss"
>
<channel
pageIndex=""
pageSize="20"
recordCount=""
F:pageIndex=""
F:pageSize="20"
F:recordCount="">
<image>
<title><![CDATA[热点新闻-搜狐焦点网深圳站 ]]></title>
<link>http://sz.focus.cn/newscenter/xwsy.html</link>
<url>http://images.house.focus.cn/img/newhouselogo.gif</url>
</image>
<title>热点新闻-搜狐焦点网深圳站</title>
<link>http://sz.focus.cn/newscenter/xwsy.html</link>
<description><![CDATA[提供最全面最及时的中国房地产新闻资讯 ]]></description>
<copyright>Copyright 2012, sohu.com Inc., all rights reserved</copyright>
<language>zh-cn</language>
<lastBuildDate>Mon, 02 Jul 2012 21:44:21 +0800</lastBuildDate>
<pubDate>Mon, 02 Jul 2012 21:44:21 +0800</pubDate>
<category>地产综合</category>
<ttl>30</ttl>';
$xmlAppend = '</channel></rss>';


$xmlOutput = $xmlPre . $xmlItemContent . $xmlAppend ;
echo $xmlOutput;


/**
 * @param $url
 * @return array itemObj
 */
function parseItems( $url )
{
    $xmlObj = simplexml_load_file( $url );
    $items = $xmlObj -> channel -> item;

    $resultArr = array();

    foreach( $items as  $item ):
        $resultArr []= makeRssItemObj( $item );
    endforeach;

    return $resultArr;
}



/**
 * @param $item
 * @return itemObj
 */
function makeRssItemObj( $item )
{
 //   $item->link = "demo.html";
    $urlContent = fileGetContent( $item->link );  //如果不支持curl 就用file_get_content 或者socket 连接来实现.

    $urlContent = @iconv("gb2312","utf-8",$urlContent);

   // $content = iconv("gb2312" , "utf-8" , $content);
    preg_match("/id=\"newscontent\">([\d\D]+)<div\s+class=\"clear\">/iU" , $urlContent , $matchArr);

    $item->description = $matchArr[1];
    return $item;
}

/**
 * @param $itemObjArr
 * @return string
 */
function makeRssItemString( $itemObjArr )
{
   $result = "";

   foreach( $itemObjArr as $itemObj  ):
       $result .= "<item>";
           $result .= "<title><![CDATA[ $itemObj->title ]]></title>";
           $result .= "<link>$itemObj->link</link>";
           $result .= "<description><![CDATA[ $itemObj->description ]]></description>";
           $result .= "<author>mu_rain</author>";
           $result .= "<pubDate>$itemObj->pubDate</pubDate>";
       $result .= "<item>";
   endforeach;
    return  $result;
}


// ------------------------------------
/**
 * regulary show the string or object or json.
 * 规格化显示
 *
 * @param  $str    对象的实例
 * @package        P
 * @subpackage    String
 * @category    Putils
 * @author        mu_rain
 *  @return mixed
 */
// ------------------------------------
function pr($array,$title = 'DEBUG',$type = 'array' , $width = '')    {
    $title .= date("Y-m-d H:i:s");
    $widthStr = "";
    if( $width) $widthStr = "width:$width"."px";

    echo "<fieldset style=\"-moz-border-radius:5px 5px 5px 5px; -moz-box-shadow:0px 0px 10px rgba(00,00,00,0.45); border: 3px solid  transparent; padding:3px; margin-top:20px; \"><legend style=\"color: #069; margin:3px; $widthStr \">$title</legend>";
    echo "<div style = '-moz-border-radius:10px 10px 10px 10px;font-size:14px; color:#069; border:1px solid #F0FAF9;  font-size:9pt; background:#F0FAF9; padding:5px;'>";
    print("<pre>");
    if($type == 'json') {
        $array = json_decode($array);
    }
    print_r($array);
    print("</pre>");
    echo "<div>";
    echo  "</fieldset>";
}

// ------------------------------------
/**
 *
 * get the urlContent use curl
 * @package    KDG
 * @subpackage    common
 * @category    mu_rain
 * @author     徐兴
 */
// ------------------------------------
function fileGetContent($url)
{
    $ch = curl_init();
    $timeout = 30;
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
    $contents = trim(curl_exec($ch));
    curl_close($ch);
    return $contents;
}