File "crawler.cls.php"

Full Path: /home/ycoalition/public_html/blog/wp-admin/js/widgets/plugins/litespeed-cache/src/crawler.cls.php
File size: 29.59 KB
MIME-type: text/x-php
Charset: utf-8

<?php
/**
 * The crawler class
 *
 * @since      	1.1.0
 */
namespace LiteSpeed;
defined( 'WPINC' ) || exit;

class Crawler extends Root {
	const TYPE_REFRESH_MAP = 'refresh_map';
	const TYPE_EMPTY = 'empty';
	const TYPE_BLACKLIST_EMPTY = 'blacklist_empty';
	const TYPE_BLACKLIST_DEL = 'blacklist_del';
	const TYPE_BLACKLIST_ADD = 'blacklist_add';
	const TYPE_START = 'start';
	const TYPE_RESET = 'reset';

	const USER_AGENT = 'lscache_walker';
	const FAST_USER_AGENT = 'lscache_runner';
	const CHUNKS = 10000;

	private $_sitemeta = 'meta.data';
	private $_resetfile;
	private $_end_reason;

	private $_crawler_conf = array(
		'cookies' => array(),
		'headers' => array(),
		'ua'	=> '',
	);
	private $_crawlers = array();
	private $_cur_threads = -1;
	private $_max_run_time;
	private $_cur_thread_time;
	private $_map_status_list = array(
		'H'	=> array(),
		'M'	=> array(),
		'B'	=> array(),
		'N'	=> array(),
	);
	protected $_summary;

	/**
	 * Initialize crawler, assign sitemap path
	 *
	 * @since    1.1.0
	 */
	public function __construct() {
		if ( is_multisite() ) {
			$this->_sitemeta = 'meta' . get_current_blog_id() . '.data';
		}

		$this->_resetfile = LITESPEED_STATIC_DIR . '/crawler/' . $this->_sitemeta . '.reset';

		$this->_summary = self::get_summary();

		Debug2::debug( '🐞 Init' );
	}

	/**
	 * Check whether the current crawler is active/runable/useable/enabled/want it to work or not
	 *
	 * @since  4.3
	 */
	public function is_active( $curr ){
		$bypass_list = self::get_option( 'bypass_list' , array() );
		return ! in_array( $curr, $bypass_list );
	}

	/**
	 * Toggle the current crawler's activeness state, i.e., runable/useable/enabled/want it to work or not, and return the updated state
	 *
	 * @since  4.3
	 */
	public function toggle_activeness( $curr ) { // param type: int
		$bypass_list = self::get_option( 'bypass_list' , array() );
		if ( in_array( $curr, $bypass_list ) ) { // when the ith opt was off / in the bypassed list, turn it on / remove it from the list
		    unset( $bypass_list[ array_search( $curr, $bypass_list ) ] );
			$bypass_list = array_values( $bypass_list );
			self::update_option( 'bypass_list' , $bypass_list );
			return true;
		} else {        	// when the ith opt was on / not in the bypassed list, turn it off / add it to the list
			$bypass_list[] = ( int ) $curr;
			self::update_option( 'bypass_list' , $bypass_list );
			return false;
		}
	}

	/**
	 * Clear bypassed list
	 *
	 * @since  4.3
	 * @access public
	 */
	public function clear_disabled_list() {
		self::update_option( 'bypass_list', array() );

		$msg = __( 'Crawler disabled list is cleared! All crawlers are set to active! ', 'litespeed-cache' );
		Admin_Display::note( $msg );

		Debug2::debug( '🐞 All crawlers are set to active...... ' );
	}

	/**
	 * Overwride get_summary to init elements
	 *
	 * @since  3.0
	 * @access public
	 */
	public static function get_summary( $field = false ) {
		$_default = array(
			'list_size'			=> 0,
			'last_update_time'	=> 0,
			'curr_crawler'		=> 0,
			'curr_crawler_beginning_time'	=> 0,
			'last_pos'			=> 0,
			'last_count'		=> 0,
			'last_crawled'		=> 0,
			'last_start_time'	=> 0,
			'last_status'		=> '',
			'is_running'		=> 0,
			'end_reason'		=> '',
			'meta_save_time'	=> 0,
			'pos_reset_check'	=> 0,
			'done'				=> 0,
			'this_full_beginning_time'	=> 0,
			'last_full_time_cost'		=> 0,
			'last_crawler_total_cost'	=> 0,
			'crawler_stats'		=> array(), // this will store all crawlers hit/miss crawl status
		);

		$summary = parent::get_summary();
		$summary = array_merge( $_default, $summary );

		if ( ! $field ) {
			return $summary;
		}

		if ( array_key_exists( $field, $summary ) ) {
			return $summary[ $field ];
		}

		return null;
	}

	/**
	 * Overwride save_summary
	 *
	 * @since  3.0
	 * @access public
	 */
	public static function save_summary( $data = null ) {
		$instance = self::cls();
		$instance->_summary[ 'meta_save_time' ] = time();

		if ( $data === null ) {
			$data = $instance->_summary;
		}

		parent::save_summary( $data );

		File::save( LITESPEED_STATIC_DIR . '/crawler/' . $instance->_sitemeta, json_encode( $data ), true );
	}

	/**
	 * Proceed crawling
	 *
	 * @since    1.1.0
	 * @access public
	 */
	public static function start( $force = false ) {
		if ( ! Router::can_crawl() ) {
			Debug2::debug( '🐞 ......crawler is NOT allowed by the server admin......' );
			return false;
		}

		if ( $force ) {
			Debug2::debug( '🐞 ......crawler manually ran......' );
		}

		self::cls()->_crawl_data( $force );
	}

	/**
	 * Crawling start
	 *
	 * @since    1.1.0
	 * @access   private
	 */
	private function _crawl_data( $force ) {
		Debug2::debug( '🐞 ......crawler started......' );
		// for the first time running
		if ( ! $this->_summary || ! Data::cls()->tb_exist( 'crawler' ) || ! Data::cls()->tb_exist( 'crawler_blacklist' ) ) {
			$this->cls( 'Crawler_Map' )->gen();
		}

		// if finished last time, regenerate sitemap
		if ( $this->_summary['done'] === 'touchedEnd' ) {
			// check whole crawling interval
			$last_fnished_at = $this->_summary[ 'last_full_time_cost' ] + $this->_summary[ 'this_full_beginning_time' ];
			if ( ! $force && time() - $last_fnished_at < $this->conf( Base::O_CRAWLER_CRAWL_INTERVAL ) ) {
				Debug2::debug( '🐞 Cron abort: cache warmed already.' );
				// if not reach whole crawling interval, exit
				return;
			}
			Debug2::debug( '🐞 TouchedEnd. regenerate sitemap....' );
			$this->cls( 'Crawler_Map' )->gen();
		}

		$this->list_crawlers();

		// Skip the crawlers that in bypassed list
		while ( ! $this->is_active( $this->_summary[ 'curr_crawler' ] ) && $this->_summary[ 'curr_crawler' ] < count( $this->_crawlers ) ) {
			Debug2::debug( '🐞 Skipped the Crawler #' . $this->_summary[ 'curr_crawler' ] . ' ......' );
			$this->_summary[ 'curr_crawler' ]++;
		}
		if ( $this->_summary[ 'curr_crawler' ] >= count( $this->_crawlers ) ) {
			$this->_end_reason = 'end';
			$this->_terminate_running();
			return;
		}

		// In case crawlers are all done but not reload, reload it
		if ( empty( $this->_summary[ 'curr_crawler' ] ) || empty( $this->_crawlers[ $this->_summary[ 'curr_crawler' ] ] ) ) {
			$this->_summary[ 'curr_crawler' ] = 0;
			$this->_summary[ 'crawler_stats' ][ $this->_summary[ 'curr_crawler' ] ] = array();
		}

		$this->load_conf();

		$this->_engine_start();
	}

	/**
	 * Load conf before running crawler
	 *
	 * @since  3.0
	 * @access private
	 */
	private function load_conf() {
		$this->_crawler_conf[ 'base' ] = home_url();

		$current_crawler = $this->_crawlers[ $this->_summary[ 'curr_crawler' ] ];

		/**
		 * Set role simulation
		 * @since 1.9.1
		 */
		if ( ! empty( $current_crawler[ 'uid' ] ) ) {
			// Get role simulation vary name
			$vary_name = $this->cls( 'Vary' )->get_vary_name();
			$vary_val = $this->cls( 'Vary' )->finalize_default_vary( $current_crawler[ 'uid' ] );
			$this->_crawler_conf[ 'cookies' ][ $vary_name ] = $vary_val;
			$this->_crawler_conf[ 'cookies' ][ 'litespeed_role' ] = $current_crawler[ 'uid' ];
		}

		/**
		 * Check cookie crawler
		 * @since  2.8
		 */
		foreach ( $current_crawler as $k => $v ) {
			if ( strpos( $k, 'cookie:') !== 0 ) {
				continue;
			}

			if ( $v == '_null' ) {
				continue;
			}

			$this->_crawler_conf[ 'cookies' ][ substr( $k, 7 ) ] = $v;
		}

		/**
		 * Set WebP simulation
		 * @since  1.9.1
		 */
		if ( ! empty( $current_crawler[ 'webp' ] ) ) {
			$this->_crawler_conf[ 'headers' ][] = 'Accept: image/webp,*/*';
		}

		/**
		 * Set mobile crawler
		 * @since  2.8
		 */
		if ( ! empty( $current_crawler[ 'mobile' ] ) ) {
			$this->_crawler_conf[ 'ua' ] = 'Mobile iPhone';
		}

		/**
		 * Limit delay to use server setting
		 * @since 1.8.3
		 */
		$this->_crawler_conf[ 'run_delay' ] = $this->conf( Base::O_CRAWLER_USLEEP ); // microseconds
		if ( ! empty( $_SERVER[ Base::ENV_CRAWLER_USLEEP ] ) && $_SERVER[ Base::ENV_CRAWLER_USLEEP ] > $this->_crawler_conf[ 'run_delay' ] ) {
			$this->_crawler_conf[ 'run_delay' ] = $_SERVER[ Base::ENV_CRAWLER_USLEEP ];
		}

		$this->_crawler_conf[ 'run_duration' ] = $this->conf( Base::O_CRAWLER_RUN_DURATION );

		$this->_crawler_conf[ 'load_limit' ] = $this->conf( Base::O_CRAWLER_LOAD_LIMIT );
		if ( ! empty( $_SERVER[ Base::ENV_CRAWLER_LOAD_LIMIT_ENFORCE ] ) ) {
			$this->_crawler_conf[ 'load_limit' ] = $_SERVER[ Base::ENV_CRAWLER_LOAD_LIMIT_ENFORCE ];
		}
		elseif ( ! empty( $_SERVER[ Base::ENV_CRAWLER_LOAD_LIMIT ] ) && $_SERVER[ Base::ENV_CRAWLER_LOAD_LIMIT ] < $this->_crawler_conf[ 'load_limit' ] ) {
			$this->_crawler_conf[ 'load_limit' ] = $_SERVER[ Base::ENV_CRAWLER_LOAD_LIMIT ];
		}

	}

	/**
	 * Start crawler
	 *
	 * @since  1.1.0
	 * @access private
	 */
	private function _engine_start() {
		// check if is running
		if ( $this->_summary['is_running'] && time() - $this->_summary['is_running'] < $this->_crawler_conf[ 'run_duration' ] ) {
			$this->_end_reason = 'stopped';
			Debug2::debug( '🐞 The crawler is running.' );
			return;
		}

		// check current load
		$this->_adjust_current_threads();
		if ( $this->_cur_threads == 0 ) {
			$this->_end_reason = 'stopped_highload';
			Debug2::debug( '🐞 Stopped due to heavy load.' );
			return;
		}

		// log started time
		$this->_summary['last_start_time'] = time();
		self::save_summary();

		// set time limit
		$maxTime = (int) ini_get( 'max_execution_time' );
		Debug2::debug( '🐞 ini_get max_execution_time=' . $maxTime );
		if ( $maxTime == 0 ) {
			$maxTime = 300; // hardlimit
		}
		else {
			$maxTime -= 5;
		}
		if ( $maxTime >= $this->_crawler_conf[ 'run_duration' ] ) {
			$maxTime = $this->_crawler_conf[ 'run_duration' ];
			Debug2::debug( '🐞 Use run_duration setting as max_execution_time=' . $maxTime );
		}
		elseif ( ini_set( 'max_execution_time', $this->_crawler_conf[ 'run_duration' ] + 15 ) !== false ) {
			$maxTime = $this->_crawler_conf[ 'run_duration' ];
			Debug2::debug( '🐞 ini_set max_execution_time=' . $maxTime );
		}
		Debug2::debug( '🐞 final max_execution_time=' . $maxTime );
		$this->_max_run_time = $maxTime + time();

		// mark running
		$this->_prepare_running();
		// run cralwer
		$this->_do_running();
		$this->_terminate_running();
	}

	/**
	 * Adjust threads dynamically
	 *
	 * @since  1.1.0
	 * @access private
	 */
	private function _adjust_current_threads() {
		/**
		 * If server is windows, exit
		 * @see  https://wordpress.org/support/topic/crawler-keeps-causing-crashes/
		 */
		if ( ! function_exists( 'sys_getloadavg' ) ) {
			Debug2::debug( '🐞 set threads=0 due to func sys_getloadavg not exist!' );
			$this->_cur_threads = 0;
			return;
		}

		$load = sys_getloadavg();
		$curload = 1;

		if ( $this->_cur_threads == -1 ) {
			// init
			if ( $curload > $this->_crawler_conf[ 'load_limit' ] ) {
				$curthreads = 0;
			}
			elseif ( $curload >= ( $this->_crawler_conf[ 'load_limit' ] - 1 ) ) {
				$curthreads = 1;
			}
			else {
				$curthreads = intval( $this->_crawler_conf[ 'load_limit' ] - $curload );
				if ( $curthreads > $this->conf( Base::O_CRAWLER_THREADS ) ) {
					$curthreads = $this->conf( Base::O_CRAWLER_THREADS );
				}
			}
		}
		else {
			// adjust
			$curthreads = $this->_cur_threads;
			if ( $curload >= $this->_crawler_conf[ 'load_limit' ] + 1 ) {
				sleep( 5 );  // sleep 5 secs
				if ( $curthreads >= 1 ) {
					$curthreads --;
				}
			}
			elseif ( $curload >= $this->_crawler_conf[ 'load_limit' ] ) {
				if ( $curthreads > 1 ) {// if already 1, keep
					$curthreads --;
				}
			}
			elseif ( ($curload + 1) < $this->_crawler_conf[ 'load_limit' ] ) {
				if ( $curthreads < $this->conf( Base::O_CRAWLER_THREADS ) ) {
					$curthreads ++;
				}
			}
		}

		// $log = 'set current threads = ' . $curthreads . ' previous=' . $this->_cur_threads
		// 	. ' max_allowed=' . $this->conf( Base::O_CRAWLER_THREADS ) . ' load_limit=' . $this->_crawler_conf[ 'load_limit' ] . ' current_load=' . $curload;

		$this->_cur_threads = $curthreads;
		$this->_cur_thread_time = time();
	}

	/**
	 * Mark running status
	 *
	 * @since  1.1.0
	 * @access private
	 */
	private function _prepare_running() {
		$this->_summary[ 'is_running' ] = time();
		$this->_summary[ 'done' ] = 0;// reset done status
		$this->_summary[ 'last_status' ] = 'prepare running';
		$this->_summary[ 'last_crawled' ] = 0;

		// Current crawler starttime mark
		if ( $this->_summary[ 'last_pos' ] == 0 ) {
			$this->_summary[ 'curr_crawler_beginning_time' ] = time();
		}

		if ( $this->_summary[ 'curr_crawler' ] == 0 && $this->_summary[ 'last_pos' ] == 0 ) {
			$this->_summary[ 'this_full_beginning_time' ] = time();
			$this->_summary[ 'list_size' ] = $this->cls( 'Crawler_Map' )->count_map();
		}

		if ( $this->_summary[ 'end_reason' ] == 'end' && $this->_summary[ 'last_pos' ] == 0 ) {
			$this->_summary[ 'crawler_stats' ][ $this->_summary[ 'curr_crawler' ] ] = array();
		}

		self::save_summary();
	}

	/**
	 * Run crawler
	 *
	 * @since  1.1.0
	 * @access private
	 */
	private function _do_running() {
		$options = $this->_get_curl_options( true );

		while ( $urlChunks = $this->cls( 'Crawler_Map' )->list_map( self::CHUNKS, $this->_summary['last_pos'] ) ) {
			// start crawling
			$urlChunks = array_chunk( $urlChunks, $this->_cur_threads );
			foreach ( $urlChunks as $rows ) {
				// multi curl
				$rets = $this->_multi_request( $rows, $options );

				// check result headers
				foreach ( $rows as $row ) {
					if ( empty( $rets[ $row[ 'id' ] ] ) ) { // If already in blacklist, no curl happened, no corresponding record
						continue;
					}

					// check response
					if ( $rets[ $row[ 'id' ] ][ 'code' ] == 428 ) { // HTTP/1.1 428 Precondition Required (need to test)
						$this->_end_reason = 'crawler_disabled';
						Debug2::debug( '🐞 crawler_disabled' );
						return;
					}

					$status = $this->_status_parse( $rets[ $row[ 'id' ] ][ 'header' ], $rets[ $row[ 'id' ] ][ 'code' ] ); // B or H or M or N(nocache)
					$this->_map_status_list[ $status ][ $row[ 'id' ] ] = array(
						'url'	=> $row[ 'url' ],
						'code' 	=> $rets[ $row[ 'id' ] ][ 'code' ], // 201 or 200 or 404
					);
					if ( empty( $this->_summary[ 'crawler_stats' ][ $this->_summary[ 'curr_crawler' ] ][ $status ] ) ) {
						$this->_summary[ 'crawler_stats' ][ $this->_summary[ 'curr_crawler' ] ][ $status ] = 0;
					}
					$this->_summary[ 'crawler_stats' ][ $this->_summary[ 'curr_crawler' ] ][ $status ]++;
				}

				// update offset position
				$_time = time();
				$this->_summary[ 'last_pos' ] += $this->_cur_threads;
				$this->_summary[ 'last_count' ] = $this->_cur_threads;
				$this->_summary[ 'last_crawled' ] += $this->_cur_threads;
				$this->_summary[ 'last_update_time' ] = $_time;
				$this->_summary[ 'last_status' ] = 'updated position';

				// check duration
				if ( $this->_summary[ 'last_update_time' ] > $this->_max_run_time ) {
					$this->_end_reason = 'stopped_maxtime';
					Debug2::debug( '🐞 Terminated due to maxtime' );
					return;
					// return __('Stopped due to exceeding defined Maximum Run Time', 'litespeed-cache');
				}

				// make sure at least each 10s save meta & map status once
				if ( $_time - $this->_summary[ 'meta_save_time' ] > 10 ) {
					$this->_map_status_list = $this->cls( 'Crawler_Map' )->save_map_status( $this->_map_status_list, $this->_summary[ 'curr_crawler' ] );
					self::save_summary();
				}

				// check if need to reset pos each 5s
				if ( $_time > $this->_summary[ 'pos_reset_check' ] ) {
					$this->_summary[ 'pos_reset_check' ] = $_time + 5;
					if ( file_exists( $this->_resetfile ) && unlink( $this->_resetfile ) ) {
						Debug2::debug( '🐞 Terminated due to reset file' );

						$this->_summary[ 'last_pos' ] = 0;
						$this->_summary[ 'curr_crawler' ] = 0;
						$this->_summary[ 'crawler_stats' ][ $this->_summary[ 'curr_crawler' ] ] = array();
						// reset done status
						$this->_summary[ 'done' ] = 0;
						$this->_summary[ 'this_full_beginning_time' ] = 0;
						$this->_end_reason = 'stopped_reset';
						return;
						// return __('Stopped due to reset meta position', 'litespeed-cache');
					}
				}

				// check loads
				if ( $this->_summary[ 'last_update_time' ] - $this->_cur_thread_time > 60 ) {
					$this->_adjust_current_threads();
					if ( $this->_cur_threads == 0 ) {
						$this->_end_reason = 'stopped_highload';
						Debug2::debug( '🐞 Terminated due to highload' );
						return;
						// return __('Stopped due to load over limit', 'litespeed-cache');
					}
				}

				$this->_summary[ 'last_status' ] = 'sleeping ' . $this->_crawler_conf[ 'run_delay' ] . 'ms';

				usleep( $this->_crawler_conf[ 'run_delay' ] );
			}
		}

		// All URLs are done for current crawler
		$this->_end_reason = 'end';
		$this->_summary[ 'crawler_stats' ][ $this->_summary[ 'curr_crawler' ] ][ 'W' ] = 0;
		Debug2::debug( '🐞 Crawler #' . $this->_summary['curr_crawler'] . ' touched end' );
	}

	/**
	 * Send multi curl requests
	 * If res=B, bypass request and won't return
	 *
	 * @since  1.1.0
	 * @access private
	 */
	private function _multi_request( $rows, $options ) {
		$mh = curl_multi_init();
		$curls = array();
		foreach ( $rows as $row ) {
			if ( substr( $row[ 'res' ], $this->_summary[ 'curr_crawler' ], 1 ) == 'B' ) {
				continue;
			}
			if ( substr( $row[ 'res' ], $this->_summary[ 'curr_crawler' ], 1 ) == 'N' ) {
				continue;
			}
			$curls[ $row[ 'id' ] ] = curl_init();

			// Append URL
			$url = $row[ 'url' ];
			if ( $this->conf( Base::O_CRAWLER_DROP_DOMAIN ) ) {
				$url = $this->_crawler_conf[ 'base' ] . $row[ 'url' ];
			}
			curl_setopt( $curls[ $row[ 'id' ] ], CURLOPT_URL, $url );
			Debug2::debug( '🐞 Crawling [url] ' . $url . ( $url == $row[ 'url' ] ? '' : ' [ori] ' . $row[ 'url' ] ) );

			curl_setopt_array( $curls[ $row[ 'id' ] ], $options );

			curl_multi_add_handle( $mh, $curls[ $row[ 'id' ] ] );
		}

		// execute curl
		if ( $curls ) {
			$last_start_time = null;
			do {
				curl_multi_exec( $mh, $last_start_time );
				if ( curl_multi_select( $mh ) == -1 ) {
					usleep( 1 );
				}
			} while ( $last_start_time > 0 );
		}

		// curl done
		$ret = array();
		foreach ( $rows as $row ) {
			if ( substr( $row[ 'res' ], $this->_summary[ 'curr_crawler' ], 1 ) == 'B' ) {
				continue;
			}
			if ( substr( $row[ 'res' ], $this->_summary[ 'curr_crawler' ], 1 ) == 'N' ) {
				continue;
			}

			$ch = $curls[ $row[ 'id' ] ];

			// Parse header
			$header_size = curl_getinfo( $ch, CURLINFO_HEADER_SIZE );
			$content = curl_multi_getcontent( $ch );
			$header = substr( $content, 0, $header_size );

			$ret[ $row[ 'id' ] ] = array(
				'header' => $header,
				'code'	=> curl_getinfo( $ch, CURLINFO_HTTP_CODE ),
			);

			curl_multi_remove_handle( $mh, $ch );
			curl_close( $ch );
		}
		curl_multi_close( $mh );

		return $ret;
	}

	/**
	 * Check returned curl header to find if cached or not
	 *
	 * @since  2.0
	 * @access private
	 */
	private function _status_parse( $header, $code ) {
		if ( $code == 201 ) {
			return 'H';
		}

		if ( stripos( $header, 'X-Litespeed-Cache-Control: no-cache' ) !== false ) {
			return 'N'; // Blacklist
		}

		$_cache_headers = array(
			'x-litespeed-cache',
			'x-lsadc-cache',
			'x-qc-cache',
		);

		foreach ( $_cache_headers as $_header ) {
			if ( stripos( $header, $_header ) !== false ) {
				if ( stripos( $header, $_header . ': miss' ) !== false ) {
					return 'M'; // Miss
				}
				return 'H'; // Hit
			}
		}

		return 'B'; // Blacklist
	}

	/**
	 * Get curl_options
	 *
	 * @since  1.1.0
	 * @access private
	 */
	private function _get_curl_options( $crawler_only = false ) {
		$options = array(
			CURLOPT_RETURNTRANSFER => true,
			CURLOPT_HEADER => true,
			CURLOPT_CUSTOMREQUEST => 'GET',
			CURLOPT_FOLLOWLOCATION => false,
			CURLOPT_ENCODING => 'gzip',
			CURLOPT_CONNECTTIMEOUT => 10,
			CURLOPT_TIMEOUT => $this->conf( Base::O_CRAWLER_TIMEOUT ), // Larger timeout to avoid incorrect blacklist addition #900171
			CURLOPT_SSL_VERIFYHOST => 0,
			CURLOPT_SSL_VERIFYPEER => false,
			CURLOPT_NOBODY => false,
			CURLOPT_HTTPHEADER => $this->_crawler_conf[ 'headers' ],
		);
		$options[ CURLOPT_HTTPHEADER ][] = 'Cache-Control: max-age=0';

		/**
		 * Try to enable http2 connection (only available since PHP7+)
		 * @since  1.9.1
		 * @since  2.2.7 Commented due to cause no-cache issue
		 * @since  2.9.1+ Fixed wrongly usage of CURL_HTTP_VERSION_1_1 const
		 */
		$options[ CURLOPT_HTTP_VERSION ] = CURL_HTTP_VERSION_1_1;
		// 	$options[ CURL_HTTP_VERSION_2 ] = 1;

		// IP resolve
		if ( $this->conf( Base::O_SERVER_IP ) ) {
			Utility::compatibility();
			if ( ( $this->conf( Base::O_CRAWLER_DROP_DOMAIN ) || ! $crawler_only ) && $this->_crawler_conf[ 'base' ] ) {
				// Resolve URL to IP
				$parsed_url = parse_url( $this->_crawler_conf[ 'base' ] );

				if ( ! empty( $parsed_url[ 'host' ] ) ) {
					$dom = $parsed_url[ 'host' ];
					$port = $parsed_url[ 'scheme' ] == 'https' ? '443' : '80';
					$url = $dom . ':' . $port . ':' . $this->conf( Base::O_SERVER_IP );

					$options[ CURLOPT_RESOLVE ] = array( $url );
					$options[ CURLOPT_DNS_USE_GLOBAL_CACHE ] = false;
				}
			}
		}

		// if is walker
		// $options[ CURLOPT_FRESH_CONNECT ] = true;

		// Referer
		if ( isset( $_SERVER[ 'HTTP_HOST' ] ) && isset( $_SERVER[ 'REQUEST_URI' ] ) ) {
			$options[ CURLOPT_REFERER ] = 'http://' . $_SERVER[ 'HTTP_HOST' ] . $_SERVER[ 'REQUEST_URI' ];
		}

		// User Agent
		if ( $crawler_only ) {
			if ( strpos( $this->_crawler_conf[ 'ua' ], Crawler::FAST_USER_AGENT ) !== 0 ) {
				$this->_crawler_conf[ 'ua' ] = Crawler::FAST_USER_AGENT . ' ' . $this->_crawler_conf[ 'ua' ];
			}
		}
		$options[ CURLOPT_USERAGENT ] = $this->_crawler_conf[ 'ua' ];

		/**
		 * Append hash to cookie for validation
		 * @since  1.9.1
		 */
		if ( $crawler_only ) {
			$this->_crawler_conf[ 'cookies' ][ 'litespeed_hash' ] = Router::get_hash();
		}

		// Cookies
		$cookies = array();
		foreach ( $this->_crawler_conf[ 'cookies' ] as $k => $v ) {
			if ( ! $v ) {
				continue;
			}
			$cookies[] = $k . '=' . urlencode( $v );
		}
		if ( $cookies ) {
			$options[ CURLOPT_COOKIE ] = implode( '; ', $cookies );
		}

		return $options;
	}

	/**
	 * Self curl to get HTML content
	 *
	 * @since  3.3
	 */
	public function self_curl( $url, $ua, $uid = false, $accept = false ) { // $accept not in use yet
		$this->_crawler_conf[ 'base' ] = home_url();
		$this->_crawler_conf[ 'ua' ] = $ua;
		if ( $accept ) {
			$this->_crawler_conf[ 'headers' ] = array( 'Accept: ' . $accept );
		}
		if ( $uid ) {
			$this->_crawler_conf[ 'cookies' ][ 'litespeed_role' ] = $uid;
			$this->_crawler_conf[ 'cookies' ][ 'litespeed_hash' ] = Router::get_hash();
		}

		$options = $this->_get_curl_options();
		$options[ CURLOPT_HEADER ] = false;
		$options[ CURLOPT_FOLLOWLOCATION ] = true;

		$ch = curl_init();
		curl_setopt_array( $ch, $options );
		curl_setopt( $ch, CURLOPT_URL, $url );
		$result = curl_exec( $ch );
		curl_close( $ch );

		return $result;
	}

	/**
	 * Terminate crawling
	 *
	 * @since  1.1.0
	 * @access private
	 */
	private function _terminate_running() {
		$this->_map_status_list = $this->cls( 'Crawler_Map' )->save_map_status( $this->_map_status_list, $this->_summary[ 'curr_crawler' ] );

		if ( $this->_end_reason == 'end' ) { // Current crawler is fully done
			// $end_reason = sprintf( __( 'Crawler %s reached end of sitemap file.', 'litespeed-cache' ), '#' . ( $this->_summary['curr_crawler'] + 1 ) );
			$this->_summary[ 'curr_crawler' ]++; // Jump to next cralwer
			// $this->_summary[ 'crawler_stats' ][ $this->_summary[ 'curr_crawler' ] ] = array(); // reset this at next crawl time
			$this->_summary[ 'last_pos' ] = 0;// reset last position
			$this->_summary[ 'last_crawler_total_cost' ] = time() - $this->_summary[ 'curr_crawler_beginning_time' ];
			$count_crawlers = count( $this->list_crawlers() );
			if ( $this->_summary[ 'curr_crawler' ] >= $count_crawlers ) {
				Debug2::debug( '🐞 _terminate_running Touched end, whole crawled. Reload crawler!' );
				$this->_summary[ 'curr_crawler' ] = 0;
				// $this->_summary[ 'crawler_stats' ][ $this->_summary[ 'curr_crawler' ] ] = array();
				$this->_summary[ 'done' ] = 'touchedEnd';// log done status
				$this->_summary[ 'last_full_time_cost' ] = time() - $this->_summary[ 'this_full_beginning_time' ];
			}
		}
		$this->_summary[ 'last_status' ] = 'stopped';
		$this->_summary[ 'is_running' ] = 0;
		$this->_summary[ 'end_reason' ] = $this->_end_reason;
		self::save_summary();
	}

	/**
	 * List all crawlers ( tagA => [ valueA => titleA, ... ] ...)
	 *
	 * @since    1.9.1
	 * @access   public
	 */
	public function list_crawlers() {
		if ( $this->_crawlers ) {
			return $this->_crawlers;
		}

		$crawler_factors = array();

		// Add default Guest crawler
		$crawler_factors[ 'uid' ] = array( 0 => __( 'Guest', 'litespeed-cache' ) );

		// WebP on/off
		if ( $this->conf( Base::O_IMG_OPTM_WEBP_REPLACE ) ) {
			$crawler_factors[ 'webp' ] = array( 1 => 'WebP', 0 => '' );
		}

		// Guest Mode on/off
		if ( $this->conf( Base::O_GUEST ) ) {
			$vary_name = $this->cls( 'Vary' )->get_vary_name();
			$vary_val = 'guest_mode:1';
			if ( ! defined( 'LSCWP_LOG' ) ) {
				$vary_val = md5( $this->conf( Base::HASH ) . $vary_val );
			}
			$crawler_factors[ 'cookie:' . $vary_name ] = array( $vary_val => '', '_null' => '<font data-balloon-pos="up" aria-label="Guest Mode">👒</font>' );
		}

		// Mobile crawler
		if ( $this->conf( Base::O_CACHE_MOBILE ) ) {
			$crawler_factors[ 'mobile' ] = array( 1 => '<font data-balloon-pos="up" aria-label="Mobile">📱</font>', 0 => '' );
		}

		// Get roles set
		// List all roles
		foreach ( $this->conf( Base::O_CRAWLER_ROLES ) as $v ) {
			$role_title = '';
			$udata = get_userdata( $v );
			if ( isset( $udata->roles ) && is_array( $udata->roles ) ) {
				$tmp = array_values( $udata->roles );
				$role_title = array_shift( $tmp );
			}
			if ( ! $role_title ) {
				continue;
			}

			$crawler_factors[ 'uid' ][ $v ] = ucfirst( $role_title );
		}

		// Cookie crawler
		foreach ( $this->conf( Base::O_CRAWLER_COOKIES ) as $v ) {
			if ( empty( $v[ 'name' ] ) ) {
				continue;
			}

			$this_cookie_key = 'cookie:' . $v[ 'name' ];

			$crawler_factors[ $this_cookie_key ] = array();

			foreach ( $v[ 'vals' ] as $v2 ) {
				$crawler_factors[ $this_cookie_key ][ $v2 ] = $v2 == '_null' ? '' : '<font data-balloon-pos="up" aria-label="Cookie">🍪</font>' . esc_html( $v[ 'name' ] ) . '=' . esc_html( $v2 );
			}
		}

		// Crossing generate the crawler list
		$this->_crawlers = $this->_recursive_build_crawler( $crawler_factors );

		return $this->_crawlers;
	}

	/**
	 * Build a crawler list recursively
	 *
	 * @since 2.8
	 * @access private
	 */
	private function _recursive_build_crawler( $crawler_factors, $group = array(), $i = 0 ) {
		$current_factor = array_keys( $crawler_factors );
		$current_factor = $current_factor[ $i ];

		$if_touch_end = $i + 1 >= count( $crawler_factors );

		$final_list = array();

		foreach ( $crawler_factors[ $current_factor ] as $k => $v ) {

			// Don't alter $group bcos of loop usage
			$item = $group;
			$item[ 'title' ] = ! empty( $group[ 'title' ] ) ? $group[ 'title' ] : '';
			if ( $v ) {
				if ( $item[ 'title' ] ) {
					$item[ 'title' ] .= ' - ';
				}
				$item[ 'title' ] .= $v;
			}
			$item[ $current_factor ] = $k;

			if ( $if_touch_end ) {
				$final_list[] = $item;
			}
			else {
				// Inception: next layer
				$final_list = array_merge( $final_list, $this->_recursive_build_crawler( $crawler_factors, $item, $i + 1 ) );
			}

		}

		return $final_list;
	}

	/**
	 * Return crawler meta file
	 *
	 * @since    1.1.0
	 * @access public
	 */
	public function json_path() {
		if ( ! file_exists( LITESPEED_STATIC_DIR . '/crawler/' . $this->_sitemeta ) ) {
			return false;
		}

		return LITESPEED_STATIC_URL . '/crawler/' . $this->_sitemeta;
	}


	/**
	 * Create reset pos file
	 *
	 * @since    1.1.0
	 * @access public
	 */
	public function reset_pos() {
		File::save( $this->_resetfile, time() , true );

		$this->_summary[ 'is_running' ] = 0;
		self::save_summary();
	}

	/**
	 * Display status based by matching crawlers order
	 *
	 * @since  3.0
	 * @access public
	 */
	public function display_status( $status_row, $reason_set ) {
		if ( ! $status_row ) {
			return '';
		}

		$_status_list = array(
			'-' => 'default',
			'M' => 'primary',
			'H' => 'success',
			'B' => 'danger',
			'N' => 'warning',
		);

		$reason_set = explode( ',', $reason_set );

		$status = '';
		foreach ( str_split( $status_row ) as $k => $v ) {
			$reason = $reason_set[ $k ];
			if ( $reason == 'Man' ) {
				$reason = __( 'Manually added to blocklist', 'litespeed-cache' );
			}
			if ( $reason == 'Existed' ) {
				$reason = __( 'Previously existed in blocklist', 'litespeed-cache' );
			}
			if ( $reason ) {
				$reason = 'data-balloon-pos="up" aria-label="' . $reason . '"';
			}
			$status .= '<i class="litespeed-dot litespeed-bg-' . $_status_list[ $v ] . '" ' . $reason . '>' . ( $k + 1 ) . '</i>';
		}

		return $status;
	}

	/**
	 * Output info and exit
	 *
	 * @since    1.1.0
	 * @access protected
	 * @param  string $error Error info
	 */
	protected function output($msg) {
		if ( defined('DOING_CRON') ) {
			echo $msg;
			// exit();
		}
		else {
			echo "<script>alert('" . htmlspecialchars($msg) . "');</script>";
			// exit;
		}
	}

	/**
	 * Handle all request actions from main cls
	 *
	 * @since  3.0
	 * @access public
	 */
	public function handler() {
		$type = Router::verify_type();

		switch ( $type ) {
			case self::TYPE_REFRESH_MAP:
				$this->cls( 'Crawler_Map' )->gen();
				break;

			case self::TYPE_EMPTY:
				$this->cls( 'Crawler_Map' )->empty_map();
				break;

			case self::TYPE_BLACKLIST_EMPTY:
				$this->cls( 'Crawler_Map' )->blacklist_empty();
				break;

			case self::TYPE_BLACKLIST_DEL:
				if ( ! empty( $_GET[ 'id' ] ) ) {
					$this->cls( 'Crawler_Map' )->blacklist_del( $_GET[ 'id' ] );
				}
				break;

			case self::TYPE_BLACKLIST_ADD:
				if ( ! empty( $_GET[ 'id' ] ) ) {
					$this->cls( 'Crawler_Map' )->blacklist_add( $_GET[ 'id' ] );
				}
				break;

			// Handle the ajax request to proceed crawler manually by admin
			case self::TYPE_START:
				self::start( true );
				break;

			case self::TYPE_RESET:
				$this->reset_pos();
				break;

			default:
				break;
		}

		Admin::redirect();
	}

}