Codeigniter

 

다음 소스를 이용

https://github.com/tristangoossens/php-sitemap-generator

 

 

sitemap-config.php

 https://www.your-domain-name.com/ or http://www.your-domain-name.com/
    "SITE_URL" => "https://macaronics.net/",

    // Boolean for crawling external links.
    //  *Domain = https://www.student-laptop.nl* , *Link = https://www.google.com* 
    "ALLOW_EXTERNAL_LINKS" => false,

    // Boolean for crawling element id links.
    //   will not be crawled when this option is set to false
    "ALLOW_ELEMENT_LINKS" => false,

    // If set the crawler will only index the anchor tags with the given id.
    // If you wish to crawl all links set the value to ""
    //   When CRAWL_ANCHORS_WITH_ID is set to "internal-link" this link will be crawled
    // but  will not be crawled.
    "CRAWL_ANCHORS_WITH_ID" => "",

    // Array with absolute links or keywords for the pages to skip when crawling the given SITE_URL.
    //  https://student-laptop.nl/info/laptops or you can just input student-laptop.nl/info/ and it will not crawl anything in that directory
    // Try to be as specific as you can so you dont skip 300 pages
    "KEYWORDS_TO_SKIP" => array(),

    // Location + filename where the sitemap will be saved.
    "SAVE_LOC" => "sitemap.xml",

    // Static priority value for sitemap
    "PRIORITY" => 1,

    // Static update frequency
    "CHANGE_FREQUENCY" => "daily",

    // Date changed (today's date)
    "LAST_UPDATED" => date('Y-m-d'),
);

 

 

 

sitemap-generator.php

config = $conf;
		$this->scanned = [];
		$this->site_url_base = parse_url($this->config['SITE_URL'])['scheme'] . "://" . parse_url($this->config['SITE_URL'])['host'];
		$this->sitemap_file = fopen($this->config['SAVE_LOC'], "w");
	}

	public function GenerateSitemap($list_url, $list_date)
	{
	    
		// Call the recursive crawl function with the start url.
		//$this->crawlPage($this->config['SITE_URL']);

		// Generate a sitemap with the scanned pages.
	    $this->generateFile($list_url, $list_date);
	}

	// Get the html content of a page and return it as a dom object
	private function getHtml($url)
	{
		// Get html from the given page
		$curl = curl_init();
		curl_setopt($curl, CURLOPT_URL, $url);
		curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
		$html = curl_exec($curl);
		curl_close($curl);

		//Load the html and store it into a DOM object
		$dom = new DOMDocument();
		@$dom->loadHTML($html);

		return $dom;
	}

	// Recursive function that crawls a page's anchor tags and store them in the scanned array.
	private function crawlPage($page_url)
	{
		$url = filter_var($page_url, FILTER_SANITIZE_URL);

		// Check if the url is invalid or if the page is already scanned;
		if (in_array($url, $this->scanned) || !filter_var($page_url, FILTER_VALIDATE_URL)) {
			return;
		}

		// Add the page url to the scanned array
		array_push($this->scanned, $page_url);

		// Get the html content from the 
		$html = $this->getHtml($url);
		$anchors = $html->getElementsByTagName('a');

		// Loop through all anchor tags on the page
		foreach ($anchors as $a) {
			$next_url = $a->getAttribute('href');

			// Check if there is a anchor ID set in the config.
			if ($this->config['CRAWL_ANCHORS_WITH_ID'] != "") {
				// Check if the id is set and matches the config setting, else it will move on to the next anchor
				if ($a->getAttribute('id') != "" || $a->getAttribute('id') == $this->config['CRAWL_ANCHORS_WITH_ID']) {
					continue;
				}
			}

			// Split page url into base and extra parameters
			$base_page_url = explode("?", $page_url)[0];

			if (!$this->config['ALLOW_ELEMENT_LINKS']) {
				// Skip the url if it starts with a # or is equal to root.
				if (substr($next_url, 0, 1) == "#" || $next_url == "/") {
					continue;
				}
			}

			// Check if the given url is external, if yes it will skip the iteration
			// This code will only run if you set ALLOW_EXTERNAL_LINKS to false in the config.
			if (!$this->config['ALLOW_EXTERNAL_LINKS']) {
				$parsed_url = parse_url($next_url);
				if (isset($parsed_url['host'])) {
					if ($parsed_url['host'] != parse_url($this->config['SITE_URL'])['host']) {
						continue;
					}
				}
			}

			// Check if the link is absolute or relative.
			if (substr($next_url, 0, 7) != "http://" && substr($next_url, 0, 8) != "https://") {
				$next_url = $this->convertRelativeToAbsolute($base_page_url, $next_url);
			}

			// Check if the next link contains any of the pages to skip. If true, the loop will move on to the next iteration.
			$found = false;
			foreach ($this->config['KEYWORDS_TO_SKIP'] as $skip) {
				if (strpos($next_url, $skip) || $next_url === $skip) {
					$found = true;
				}
			}

			// Call the function again with the new URL
			if (!$found) {
				$this->crawlPage($next_url);
			}
		}
	}

	// Convert a relative link to a absolute link
	// Example: Relative /articles
	//			Absolute https://student-laptop.nl/articles
	private function convertRelativeToAbsolute($page_base_url, $link)
	{
		$first_character = substr($link, 0, 1);
		if ($first_character == "?" || $first_character == "#") {
			return $page_base_url . $link;
		} else if ($first_character != "/") {
			return $this->site_url_base . "/" . $link;
		} else {
			return $this->site_url_base . $link;
		}
	}

	// Function to generate a Sitemap with the given pages array where the script has run through
	private function generateFile($pages , $date)
	{
		$xml = '
        
		
		';


		// Print the amount of pages
		//echo count($pages);
		//$this->config['LAST_UPDATED']
		for($i=0; $i< count($pages); $i++) {
		    $xml .= "". $pages[$i]. "
            " . $date[$i] . "
            " . $this->config['CHANGE_FREQUENCY'] . "
            " . $this->config['PRIORITY'] . "";
		}

		$xml .= "";
		$xml = str_replace('&', '&', $xml);

		// Format string to XML
		$dom = new DOMDocument;
		$dom->preserveWhiteSpace = FALSE;
		$dom->loadXML($xml);
		$dom->formatOutput = TRUE;

		// Write XML to file and close it
		fwrite($this->sitemap_file, $dom->saveXML());
		fclose($this->sitemap_file);
	}
}

 

 

 

 

모델 에서 다음과 같이 가져온다.

	function selectSitemapList(){
	    $sql ="select board_id, DATE_FORMAT(reg_date, '%Y-%m-%dT%TZ') AS reg_date, board_subject from ci_board   order by board_id desc Limit 0 , 500";	    
	    $result=$this->db->query($sql);	    
	    return $result->result(); 
	}
	

 

컨트롤 

   $sitemapList =$this->auth_m->selectSitemapList();
         $sitemapUrl=$this->config->base_url()."index.php";
         $list_url = [];
         $list_date = [];
         foreach( $sitemapList as $board){
             //다음과 같은 형식의 sitemapurl 생성  :  https://macaronics.net/index.php/m01/jsp/view/1738   
             $sitemap_URL= $sitemapUrl."/".url_name($board->board_subject)."/".$board->board_subject."/view/".$board->board_id;            
             array_push($list_url, $sitemap_URL);
             array_push($list_date, $board->reg_date);
         }
         
         include "/home1/macaronics/public_html/aaa/sitemap-generator.php";
         $smg = new SitemapGenerator(include("/home1/macaronics/public_html/aaa/sitemap-config.php"));
         $smg->GenerateSitemap($list_url,$list_date);

 

 

 

=>xml 파일로 생성

private function generateFile($pages , $date)
	{
		$xml = '
        
		
		';


		// Print the amount of pages
		//echo count($pages);
		//$this->config['LAST_UPDATED']
		for($i=0; $i< count($pages); $i++) {
		    $xml .= "". $pages[$i]. "
            " . $date[$i] . "
            " . $this->config['CHANGE_FREQUENCY'] . "
            " . $this->config['PRIORITY'] . "";
		}

		$xml .= "";
		$xml = str_replace('&', '&', $xml);

		// Format string to XML
		$dom = new DOMDocument;
		$dom->preserveWhiteSpace = FALSE;
		$dom->loadXML($xml);
		$dom->formatOutput = TRUE;

		// Write XML to file and close it
		fwrite($this->sitemap_file, $dom->saveXML());
		fclose($this->sitemap_file);
	}

 

 

작업 스케쥴을 하던지

http://www.ciboard.co.kr/tiptech/p/340

https://www.glennstovall.com/writing-cron-job-in-codeigniter/

 

특정 페이지에 코드를 삽입해서  주기적으로  sitemap.xml 생성한다.

혹은, 게시판 등록시 코드를 삽입한다.

 

 

 

 

about author

PHRASE

Level 1  라이트

댓글 ( 4)

댓글 남기기

작성