<?php
/*========================================================================*\
|| ###################################################################### ||
|| # vBulletin 5.1.9 Patch Level 4 - Licence Number LD125EAAF9
|| # ------------------------------------------------------------------ # ||
|| # Copyright 2000-2016 vBulletin Solutions Inc. All Rights Reserved.  # ||
|| # This file may not be redistributed in whole or significant part.   # ||

|| # With great thanks to the contribution provided by Andreas          # ||
|| # for the development of this script.                                # ||
|| # ----------------- VBULLETIN IS NOT FREE SOFTWARE ----------------- # ||
|| # http://www.vbulletin.com | http://www.vbulletin.com/license.html   # ||
|| ###################################################################### ||
\*========================================================================*/

// ######################## SET PHP ENVIRONMENT ###########################
// for some reason adding the @ to the beginning of this line makes it not work
error_reporting(E_ALL & ~E_NOTICE);

if (!function_exists('readline'))
{
	function readline( $prompt = '' )
	{
		echo $prompt;
		return rtrim( fgets( STDIN ), "\n" );
	}
}

function fetch_postindex_exec_time($seconds)
{
	static $ph_hours = '', $ph_minutes = '', $ph_seconds = '';
	if (empty($ph_hours) OR empty($ph_minutes) OR empty($seconds))
	{
		try
		{
			$vbphrase = vB_Api::instanceInternal('phrase')->fetch(array('x_seconds', 'x_minutes_and_y_seconds', 'x_hours_y_minutes_and_z_seconds'));
		}
		catch (Exception $e)
		{
			$vbphrase['x_hours_y_minutes_and_z_seconds'] = $vbphrase['x_minutes_and_y_seconds'] = $vbphrase['x_seconds'] = '';
		}

		$ph_hours = $vbphrase['x_hours_y_minutes_and_z_seconds'];
		$ph_minutes = $vbphrase['x_minutes_and_y_seconds'];
		$ph_seconds = $vbphrase['x_seconds'];
	}
	$d['h'] = floor($seconds/3600);
	$d['m'] = floor( ($seconds - ($d['h']*3600)) / 60 );
	$d['s'] = $seconds % 60;
	$time = "$d[s] seconds";
	$phrase = $ph_seconds;
	$params = array($d['s']);
	if (!empty($d['h']) OR !empty($d['m']))
	{
		$phrase = $ph_minutes;
		array_unshift($params, $d['m']);
	}
	if (!empty($d['h']))
	{
		$phrase = $ph_hours;
		array_unshift($params, $d['h']);
	}
	return vB_Phrase::parsePhrase($phrase, $params);
}

function clearcache($nodeId)
{
	vB_Cache::allCacheEvent('nodeChg_' . $nodeId);
	vB_Cache::instance(vB_Cache::CACHE_STD)->purge('node_' . $nodeId . '_lvl1data');
	vB_Cache::instance(vB_Cache::CACHE_STD)->purge('node_' . $nodeId . '_lvl4data');
	vB_Cache::instance(vB_Cache::CACHE_STD)->purge('node_' . $nodeId . '_lvl3data');
}


// this file will most likely be run from the forum root
$def_core = realpath('core');
$forumspath = '';
// the following phrases can't be fetched through the phrase API. We need to know the core path first.
do
{
	if (!empty($forumspath))
	{
		print ("\n$forumspath is not a valid directory, please try again\n");
	}
	$forumspath = trim(readline("Please enter the path to your vBulletin directory (default $def_core): "));
	if (empty($forumspath))
	{
		$forumspath = $def_core;
	}
}
while (!is_dir($forumspath));
// ##################### DEFINE IMPORTANT CONSTANTS #######################
define('THIS_SCRIPT', 'searchindex');
define('VB_AREA', 'Maintenance');
define('SKIP_SESSIONCREATE', 1);
define('VB_ENTRY', true);
define('NOCOOKIES', 1);

chdir($forumspath);

// ########################################################################
// ######################### START MAIN SCRIPT ############################
// ########################################################################

require_once('./global.php');
@set_time_limit(0);
$mysqlversion = vB::getDbAssertor()->getRow('mysqlVersion');
define('MYSQL_VERSION', $mysqlversion['version']);

try
{
	$vbphrase = vB_Api::instanceInternal('phrase')->fetch(array(
		'note_reindexing_empty_indexes_x', 'building_search_index', 'default', 'empty_the_index', 'fetching_x_nodes', 'reindexing_all', 'calculating_total',
		'indexing_x_nodes', 'indexing_from_x_to_y_in_z', 'indexing_x_nodes_took_y', 'fetching_parent_nodes_for_attachments', 'indexing_parent_nodes',
		'indexing_parents_took_x', 'indexing_rest_of_contenttypes', 'indexing_from_x_to_y', 'indexing_failed_for_x', 'indexing_x_nodes_y_percent_speed_z_eta',
		'rebuilt_search_index_not_implemented'
	));
}
catch (Exception $e)
{}


echo(strip_tags($vbphrase['note_reindexing_empty_indexes_x']) . "\n");
$emptyindex = intval(readline($vbphrase['empty_index'].' [0/1,'.$vbphrase['default'].'=0]: '));

echo("\n");
$searchAPI = vB_Api::instanceInternal('search');
$start = time();
$perpage = 1000;

if ($emptyindex)
{
	echo($vbphrase['reindexing_all'] . "\n");
	$startbatch = time();
	// sphinx has it's own implementation of full indexing
	try
	{
		if ($searchAPI->reIndexAll())
		{
			echo($vbphrase['building_search_index'] . ': ' . fetch_postindex_exec_time(time()-$start) . "\n");
			// indexing is done
			die();
		}
	}
	catch (Exception $e)
	{
		$phrase = vB_Api::instanceInternal('phrase')->fetch(array('rebuilt_search_index_not_implemented'));
		echo($phrase['rebuilt_search_index_not_implemented']);
	}
	echo($vbphrase['empty_the_index'] . '...');
	$searchAPI->emptyIndex();
	$endbatch = time();
	$table = 'vBDBSearch:textToIndex';
	$table_count = 'vBDBSearch:textToIndexCount';
	echo(fetch_postindex_exec_time($endbatch-$startbatch) . "\n");
}
else
{
	$table = 'vBDBSearch:textToIndexEmptyCRC32';
	$table_count = 'vBDBSearch:textToIndexEmptyCRC32Count';
}

$params = array('contenttypeid' => vB_Api::instanceInternal('ContentType')->fetchContentTypeIdFromClass('Text'));

echo($vbphrase['calculating_total'] . "\n");
$total = vB::getDbAssertor()->getField($table_count, $params);

echo("\n" . vB_Phrase::parsePhrase($vbphrase['indexing_x_nodes'], array('Text')) . "\n");

$startText = time();
$offset = 0;
$endbatch = 0;
$failed = array();

$params[vB_Db_Query::PARAM_LIMIT] = $perpage;
// first we index the text nodes with a direct approach, skipping a few API calls

do
{
	$count = 0;
	$passed = 0;
	$startbatch = time();

	$params[vB_Db_Query::PARAM_LIMITSTART] = count($failed);
	$textNodes = vB::getDbAssertor()->assertQuery($table, $params);
	foreach ($textNodes as $node)
	{
		try
		{
			$searchAPI->indexText($node, $node['title'], $node['rawtext'], true);
			clearcache($node['nodeid']);
			$passed ++;
		}
		catch (Exception $e)
		{
			$failed[] = $node['nodeid'];
		}

		$count++;
	}

	$offset += $count;
//	$total = max($offset, $total);
	$endbatch = time();
	$spent = $endbatch - $startbatch;
	$percent = $total == 0 ? 100 : round(100 * $offset/$total);
	$speed = $spent == 0 ? 0 : round($count / $spent);
	$eta = $speed == 0 ? 0 : ($total - $offset) / $speed;

	echo(vB_Phrase::parsePhrase($vbphrase['indexing_x_nodes_y_percent_speed_z_eta'], array(
			'Text',
			"$offset / $total ($spent s) " . $percent,
			$speed,
			fetch_postindex_exec_time($eta)
		)) . "\n");

	vB_Cache::resetCache();

}while(($passed > 0) AND ($offset < $total));

if (!empty($failed))
{
	echo(vB_Phrase::parsePhrase($vbphrase['indexing_failed_for_x'], array(implode(',', $failed))) . "\n");
	$failed = array();
}

echo(vB_Phrase::parsePhrase($vbphrase['indexing_x_nodes_took_y'], array('Text', fetch_postindex_exec_time(microtime(true)-$start))) . "\n");

unset($textNodes);

// now we index the channel nodes with a direct approach, skipping a few API calls
$startchannel = microtime(true);
$params = array('contenttypeid' => vB_Api::instanceInternal('ContentType')->fetchContentTypeIdFromClass('Channel'));
$startbatch = time();
echo(vB_Phrase::parsePhrase($vbphrase['fetching_x_nodes'], array('Channel')) . '...');

$channelNodes = vB::getDbAssertor()->assertQuery('vBForum:node', $params);
echo (fetch_postindex_exec_time(microtime(true)-$startbatch) . "\n");
echo(vB_Phrase::parsePhrase($vbphrase['indexing_x_nodes'], array('Channel')) . "...\n");
$startbatch = time();

$count = 1;
foreach ($channelNodes as $node)
{
	if (!($count % $perpage))
	{
		echo(vB_Phrase::parsePhrase($vbphrase['indexing_from_x_to_y_in_z'], array($count - $perpage, $count, fetch_postindex_exec_time(time()-$startbatch))) . "\n");
		$startbatch = time();
		if (!empty($failed))
		{
			echo(vB_Phrase::parsePhrase($vbphrase['indexing_failed_for_x'], array(implode(',', $failed))) . "\n");
			$failed = array();
		}
	}
	try
	{
		$searchAPI->indexText($node, $node['title'], $node['rawtext'], true);
		clearcache($node['nodeid']);
	}
	catch (Exception $e)
	{
		$failed[] = $node['nodeid'];
	}

	$count++;
}

if (!empty($failed))
{
	echo(vB_Phrase::parsePhrase($vbphrase['indexing_failed_for_x'], array(implode(',', $failed))) . "\n");
	$failed = array();
}

echo(vB_Phrase::parsePhrase($vbphrase['indexing_x_nodes_took_y'], array('Channel', fetch_postindex_exec_time(microtime(true)-$startchannel))) . "\n");
unset($channelNodes);


// index the attach nodes with a direct approach, skipping a few API calls
$params = array('contenttypeid' => vB_Api::instanceInternal('ContentType')->fetchContentTypeIdFromClass('Attach'));
$count = 1;
$startbatch = $startAttachAll = time();
echo(vB_Phrase::parsePhrase($vbphrase['fetching_x_nodes'], array('Attach')) . '...');
$attachNodes = vB::getDbAssertor()->assertQuery('vBDBSearch:fetchAttachments', $params);
echo (fetch_postindex_exec_time(microtime(true)-$startbatch) . "\n");
echo(vB_Phrase::parsePhrase($vbphrase['indexing_x_nodes'], array('Attach')) . "...\n");
$startbatch = time();
$parents = array();
foreach ($attachNodes as $node)
{
	if (!($count % $perpage))
	{
		echo(vB_Phrase::parsePhrase($vbphrase['indexing_from_x_to_y_in_z'], array($count - $perpage, $count, fetch_postindex_exec_time(time()-$startbatch))) . "\n");
		$startbatch = time();
		if (!empty($failed))
		{
			echo(vB_Phrase::parsePhrase($vbphrase['indexing_failed_for_x'], array(implode(',', $failed))) . "\n");
			$failed = array();
		}
	}
	$text = trim($node['caption'] . ' ' . $node['filename']);
	if (!empty($text))
	{
		if (empty($parents[$node['nodeid']]))
		{
			$parents[$node['nodeid']] = '';
		}
		$parents[$node['nodeid']] .= $text;
	}
	try
	{
		$searchAPI->indexText($node, $text, $node['parenttitle'], true);
		clearcache($node['nodeid']);
	}
	catch (Exception $e)
	{
		$failed[] = $node['nodeid'];
	}

	$count++;
}

if (!empty($failed))
{
	echo(vB_Phrase::parsePhrase($vbphrase['indexing_failed_for_x'], array(implode(',', $failed))) . "\n");
	$failed = array();
}
unset($attachNodes);
// need to index the nodes the attachments belong to
$count = 1;
$startbatch = time();
echo($vbphrase['fetching_parent_nodes_for_attachments']);
$textNodes = vB::getDbAssertor()->assertQuery('vBForum:node', array('nodeid' => array_keys($parents)));
echo (fetch_postindex_exec_time(microtime(true)-$startbatch) . "\n");
echo($vbphrase['indexing_parent_nodes'] . "\n");
$startbatch = $startAttach = time();
foreach ($textNodes as $node)
{
	if (!($count % $perpage))
	{
		echo(vB_Phrase::parsePhrase($vbphrase['indexing_from_x_to_y_in_z'], array($count - $perpage, $count, fetch_postindex_exec_time(time()-$startbatch))) . "\n");
		$startbatch = time();
		if (!empty($failed))
		{
			echo(vB_Phrase::parsePhrase($vbphrase['indexing_failed_for_x'], array(implode(',', $failed))) . "\n");
			$failed = array();
		}
	}
	try
	{
		$searchAPI->indexText($node, $node['title'], $node['rawtext'] . ' ' . $parents[$node['nodeid']], true);
		clearcache($node['nodeid']);
	}
	catch (Exception $e)
	{
		$failed[] = $node['nodeid'];
	}

	$count++;
}

if (!empty($failed))
{
	echo(vB_Phrase::parsePhrase($vbphrase['indexing_failed_for_x'], array(implode(',', $failed))) . "\n");
	$failed = array();
}

echo(vB_Phrase::parsePhrase($vbphrase['indexing_parents_took_x'], array(fetch_postindex_exec_time(microtime(true)-$startAttach))) . "\n");
unset($textNodes);
$startchannel = microtime(true);
// now we index the channel nodes with a direct approach, skipping a few API calls

echo(vB_Phrase::parsePhrase($vbphrase['indexing_x_nodes_took_y'], array('Attach', fetch_postindex_exec_time(microtime(true)-$startAttachAll))) . "\n");


$params = array('contenttypeid' => vB_Api::instanceInternal('ContentType')->fetchContentTypeIdFromClass('PrivateMessage'));
// first we index the text nodes with a direct approach, skipping a few API calls
$count = 1;
$startbatch = $startPM = time();
$offset = 0;
$endbatch = 0;
$failed = array();

echo($vbphrase['calculating_total'] . "\n");
$total = vB::getDbAssertor()->getField($table_count, $params);

echo(vB_Phrase::parsePhrase($vbphrase['indexing_x_nodes'], array('PrivateMessage')) . "\n");

$params[vB_Db_Query::PARAM_LIMIT] = $perpage;

do
{
	$count = 0;
	$startbatch = time();
	$passed = 0;
	$params[vB_Db_Query::PARAM_LIMITSTART] = count($failed);

	$PMs = vB::getDbAssertor()->assertQuery($table, $params);
	foreach ($PMs as $node)
	{
		try
		{
			$searchAPI->indexText($node, $node['title'], $node['rawtext'], true);
			clearcache($node['nodeid']);
			$passed ++;
		}
		catch (Exception $e)
		{
			$failed[] = $node['nodeid'];
		}

		$count++;
	}
	$offset += $count;
//	$total = max($offset, $total);
	$endbatch = time();
	$spent = $endbatch - $startbatch;
	$speed = $spent == 0 ? 0 : round($count / $spent);
	$eta = $speed == 0 ? 0 : ($total - $offset) / $speed;
	$percent = $total == 0 ? 100 : round(100 * $offset/$total);
	echo(vB_Phrase::parsePhrase($vbphrase['indexing_x_nodes_y_percent_speed_z_eta'], array(
			'PrivateMessage',
			"$offset / $total ($spent s) " . $percent,
			$speed,
			fetch_postindex_exec_time($eta)
	)) . "\n");

	vB_Cache::resetCache();


}while(($passed > 0) AND ($offset < $total));

if (!empty($failed))
{
	echo(vB_Phrase::parsePhrase($vbphrase['indexing_failed_for_x'], array(implode(',', $failed))) . "\n");
	$failed = array();
}

echo(vB_Phrase::parsePhrase($vbphrase['indexing_x_nodes_took_y'], array('PrivateMessage', fetch_postindex_exec_time(microtime(true)-$startPM))) . "\n");
unset($privateMessageNodes);

//the remaining contenttypes will be indexed using the regular index
$params = $count_params = array(
		vB_Db_Query::PARAM_LIMIT => $perpage,
		vB_dB_Query::CONDITIONS_KEY => array(
							array('field' => 'contenttypeid', 'value' => vB_Api::instanceInternal('ContentType')->fetchContentTypeIdFromClass('Text'), 'operator' => vB_dB_Query::OPERATOR_NE),
							array('field' => 'CRC32', 'value' => '', 'operator' => vB_dB_Query::OPERATOR_EQ)
						)
		);
// we will only index the nodes that have not been indexed before
$count_params[vB_dB_Query::TYPE_KEY] = vB_dB_Query::QUERY_COUNT;
echo($vbphrase['calculating_total'] . "\n");
$total = vB::getDbAssertor()->getField('vBForum:node', $count_params);
echo(vB_Phrase::parsePhrase($vbphrase['indexing_x_nodes'], array('Other')) . "\n");

$startOther = time();
$offset = 0;
$failed = array();

echo($vbphrase['indexing_rest_of_contenttypes'] . "\n");
do
{
	$count = 0;
	$startbatch = time();
	$passed = 0;

	$params[vB_Db_Query::PARAM_LIMITSTART] = count($failed);

	$otherNodes = vB::getDbAssertor()->assertQuery('vBForum:node', $params);
	foreach ($otherNodes as $node)
	{
		try
		{
			$searchAPI->index($node, 0);
			clearcache($node['nodeid']);
			$passed ++;
		}
		catch (Exception $e)
		{
			$failed[] = $node['nodeid'];
		}

		$count++;
	}

	$endbatch = time();

	$offset += $count;
//	$total = max($offset, $total);
	$endbatch = time();
	$spent = $endbatch - $startbatch;
	$percent = $total == 0 ? 100 : round(100 * $offset/$total);
	$speed = $spent == 0 ? 0 : round($count / $spent);
	$eta = $speed == 0 ? 0 : ($total - $offset) / $speed;
	echo(vB_Phrase::parsePhrase($vbphrase['indexing_x_nodes_y_percent_speed_z_eta'], array(
			'Other',
			"$offset / $total ($spent s) " . $percent,
			$speed,
			fetch_postindex_exec_time($eta)
	)) . "\n");

	vB_Cache::resetCache();

}while(($passed > 0) AND ($offset < $total));

if (!empty($failed))
{
	echo(vB_Phrase::parsePhrase($vbphrase['indexing_failed_for_x'], array(implode(',', $failed))) . "\n");
	$failed = array();
}

echo(vB_Phrase::parsePhrase($vbphrase['indexing_x_nodes_took_y'], array('Other', fetch_postindex_exec_time(microtime(true)-$startOther))) . "\n");
unset($otherNodes);

$end = microtime(true);
echo($vbphrase['building_search_index'] . ': ' . fetch_postindex_exec_time($end-$start) . "\n");

/*======================================================================*\
|| ####################################################################
|| # Downloaded: 05:43, Thu May 26th 2016
|| # CVS: $RCSfile$ - $Revision: 83435 $
|| ####################################################################
\*======================================================================*/
