<?php
defined( '_JEXEC' ) or die( 'Restricted access' );

class TableExtraction {
	var $contents = null;
	var $caret_position = null;
	
	function getInput() {
		if (isset($_REQUEST['editor_content'])) {
			$this->contents = $_REQUEST['editor_content'];			
			// remove unwanted carriage return chars
			$rcount = 0;
			$offset = 0;
			mb_internal_encoding('UTF-8');
			$pos = mb_strpos ( $this->contents, "\r\n", $offset);
			while ($pos) {
				$rcount++;
				$offset = $pos + 2;
				$pos = mb_strpos ( $this->contents, "\r\n", $offset);
			}
			if ($rcount) {
				$this->contents = str_replace("\r\n","\n",$this->contents);
			}									
		}
		if (isset($_REQUEST['caret_position'])) {
			$this->caret_position = intval($_REQUEST['caret_position']);
		}
		
		if (empty($this->contents) || empty($this->caret_position)) {
			$error_msg = JText::_('COM_TABULIZER_INVALID_TABLE_SELECTION');
			return false;
		} else return true;						
		
	}
	
	function verifyPHPLibraries(&$error_msg) {
		if (!function_exists('libxml_use_internal_errors')) {
			$error_msg = sprintf(JText::_('COM_TABULIZER_XML_PARSER_MISSING'), 'libxml_use_internal_errors');
			return false;			
		} 			
		if (!function_Exists('mb_internal_encoding')) {
			$error_msg = sprintf(JText::_('COM_TABULIZER_MBSTRING_MISSING'),'mb_internal_encoding');
			return false;
		}	
		return true;
	}	
	
	function process() {
		$error_msg = null;
		
		if ($this->verifyPHPLibraries($error_msg)) {		
			$this->getInput($error_msg);								
		}
		
		if (!$error_msg) {
			$this->extractTableData($table_text, $column_separator, $range_from, $range_to, $error_msg);
			$this->outputData($table_text, $column_separator, $range_from, $range_to, $error_msg);
		} else {
			$this->outputData(null, null, null, null, $error_msg);
		}
	}			
	
	function extractTableRowsAndColumns($rows, &$table_text, &$column_separator, &$error_msg) {			
		$table_array = array();
		$cell_values = array();
		$row_id = 0;
		foreach ($rows as $row) {
			$row_id++;
			if (empty($table_array[$row_id])) {
				$table_array[$row_id] = array();				
			}	
			$column_id = 0;			
			foreach ($row as $cell) {				
				$column_id++;
				while (!empty($table_array[$row_id][$column_id])) $column_id++;
				$cell_value = str_replace("\n",'', $cell['value']);
				$table_array[$row_id][$column_id] = $cell_value;					
				$cell_values[] = $cell_value;
				if (!empty($cell['colspan'])) {
					$span = $cell['colspan'];
					for ($i=1;$i<$span;$i++) {
						$column_id++;						
						$table_array[$row_id][$column_id] = '';
					}
				} else if (!empty($cell['rowspan'])) {
					$span = $cell['rowspan'];
					for ($i=1;$i<$span;$i++) {
						$row_id_span = $row_id + $i;
						if (empty($table_array[$row_id_span])) $table_array[$row_id_span] = array();
						$table_array[$row_id_span][$column_id] = '';
					}
				}
			}
		}				
		
		$column_separator_found = false;		
		if (empty($cell_values)) {
			$error_msg = JText::_('COM_TABULIZER_INVALID_TABLE_SELECTION');
			return;
		} else {
			$separators = array(',' => SEPARATOR_COMMA,';' => SEPARATOR_SEMICOLON, '^' => SEPARATOR_CARET, '*' => SEPARATOR_ASTERISK, ' '=>SEPARATOR_SPACE); // do not use the tab separator, it's not very handy for this job
			foreach ($separators as $sep => $column_separator) {
				$valid = true;
				foreach ($cell_values as $value) {
					if (empty($value)) continue;
					if (strpos($value,$sep) !== false) { $valid = false; break; }
				}
				if ($valid) {
					$column_separator_found = true;
					break;
				}
			}
		}
		
		# use text enclosure, if needed		
		if (empty($column_separator_found)) {
			$column_enclosure = '"';
			$column_escape = '"';		
			$column_separator = SEPARATOR_COMMA;
			$sep = '","';			
		} else {
			$column_enclosure = '';							
		}												
		
		$current_count = 0;
		$column_count = 0;
		$table_text = '';
		foreach ($table_array as $row_id => $cells) {
			$current_count = count($cells);
			if (empty($column_count)) $column_count = $current_count;
			else if ($column_count != $current_count) {
				$error_msg = JText::_('COM_TABULIZER_INVALID_TABLE_SELECTION');
				return;
			}			
			if (!empty($column_enclosure)) {
				$ra = $escape . $enclosure;
				$rw = $enclosure;
				foreach ($cells as &$cell) {
					$cell = str_replace($rw, $ra, $cell);
				}
			}			
			$table_text .= $column_enclosure. implode($sep,$cells) . $column_enclosure . "\n";
		}		
				
	}			
		
	function extractTableData(&$table_text, &$column_separator, &$range_from, &$range_to, &$error_msg) {	
		mb_internal_encoding ('UTF-8');
		mb_regex_encoding("UTF-8");
		
		$contents = $this->contents;						
		$caret_position = $this->caret_position;
		
		# search for table limits
		$range = array();
		$range_stack = array();
		$table_ranges = array();
		
		$pos = mb_stripos($contents, '<table');
		while ($pos) {
			$range[$pos] = 'open';
			$pos = mb_stripos($contents, '<table', $pos + 6);
		}
		$pos = mb_stripos($contents, '</table');
		while ($pos) {
			$epos = mb_stripos($contents, '>', $pos + 7); 
			if ($epos) {
				$range[$epos] = 'close';
				$pos = mb_stripos($contents, '</table', $epos + 1);
			} else {
				$pos = 0;
			}				
		}
		ksort($range);
		foreach ($range as $key => $value) {
			if ($value == 'open') {				
				$limit = $key;
				array_push($range_stack, $limit);
			} else {
				$limit = array_pop($range_stack);
				if (!empty($limit)) {
					$table_ranges[] = array('open' => $limit, 'close' => $key);
				}	
			}
		}
		// check if you have an unclosed table
		if (count($range_stack)) {
			$limit = array_pop($range_stack);
			$table_ranges[] = array('open' => $limit, 'close' => mb_strlen($contents) - 1);
		}
		
		$selected_range = array('from' => null, 'to' => null);
		if (count($table_ranges)) {
			foreach ($table_ranges as $table_range) {
				$range_from =  $table_range['open'];
				$range_to =  $table_range['close'];
				if (($caret_position > $range_from) && ($caret_position < $range_to)) {
					if (empty($selected_range['from'])) {
						$selected_range['from'] = $range_from;
						$selected_range['to'] = $range_to;
					} else if ($selected_range['from'] < $range_from) {
						$selected_range['from'] = $range_from;
						$selected_range['to'] = $range_to;					
					}	
				}
			}
		}
		
		if (empty($selected_range['from'])||empty($selected_range['to'])) {
			$error_msg = JText::_('COM_TABULIZER_INVALID_TABLE_SELECTION');
			return;
		} else {
			$range_from = $selected_range['from'];
			$range_to = $selected_range['to'];
			if ($range_to < $range_from) {
				$error_msg = JText::_('COM_TABULIZER_INVALID_TABLE_SELECTION');
				return;				
			} else {
				$range_len = $range_to  - $range_from + 1;
				$table_source =  mb_substr ( $contents, $range_from, $range_len );									
			}
		}		
		
		# load and normalize table contents
		$table_source = str_replace(array("\n", "\r"), " ", $table_source); 
		$rows = array();
		
		$dom = new DOMDocument();
		libxml_use_internal_errors(true);
		$dom->loadHTML('<?xml encoding="UTF-8">' . $table_source);
		
		$table = $dom->getElementsByTagName('table')->item(0);
		
		/*
		Tag 	Description
		<table> 	Defines a table
		<th> 	Defines a header cell in a table
		<tr> 	Defines a row in a table
		<td> 	Defines a cell in a table
		<caption> 	Defines a table caption
		<colgroup> 	Specifies a group of one or more columns in a table for formatting
		<col> 	Specifies column properties for each column within a <colgroup> element
		<thead> 	Groups the header content in a table
		<tbody> 	Groups the body content in a table
		<tfoot>
		*/
		
		$table_elements = array('head'=>array(),'body'=>array(),'foot'=>array());
		
		foreach($table->childNodes as $node) {
			if (is_object($node)) {
				$nodeName = strtoupper($node->nodeName);
				$table_section = null;
				switch ($nodeName) {
					case 'TBODY': if (empty($table_section)) $table_section = 'body';				
					case 'THEAD': if (empty($table_section)) $table_section = 'head';	
					case 'TFOOT': if (empty($table_section)) $table_section = 'foot';	
						foreach ($node->childNodes as $tr) {
							if (is_object($tr)) {
								$cells = array();
								foreach ($tr->childNodes as $td) {
									if (is_object($td)) {
										$tdName = strtoupper($td->nodeName);
										if (($tdName=='TD')||($tdName=='TH')) {
											$cell = array('value' => $dom->saveXML($td));
											if ($td->hasAttribute('colspan')) {
												$cell['colspan'] = $td->getAttribute('colspan');
											}	
											if ($td->hasAttribute('rowspan')) {
												$cell['rowspan'] = $td->getAttribute('rowspan');
											}
											if (preg_match('/^<td([^>]*)>(.+)<\/td>$/si',$cell['value'],$matches)) $cell['value'] = trim($matches[2]);		
											else if (preg_match('/^<th([^>]*)>(.+)<\/th>$/si',$cell['value'],$matches)) $cell['value'] = trim($matches[2]);	
											$cells[] = $cell;														
										}
									}	
								}
								$table_elements[$table_section][] = $cells;
							}
						}
						break;
					
					case 'TR':
						$table_section = 'body';				
						$tr = &$node;
						$cells = array();
						foreach ($tr->childNodes as $td) {
							if (is_object($td)) {
								$tdName = strtoupper($td->nodeName);
								if (($tdName=='TD')||($tdName=='TH')) {
									$cell = array('value' => $dom->saveXML($td));
									if ($td->hasAttribute('colspan')) {
										$cell['colspan'] = $td->getAttribute('colspan');
									}	
									if ($td->hasAttribute('rowspan')) {
										$cell['rowspan'] = $td->getAttribute('rowspan');
									}
									if (preg_match('/^<td([^>]*)>(.+)<\/td>$/si',$cell['value'],$matches)) $cell['value'] = trim($matches[2]);		
									else if (preg_match('/^<th([^>]*)>(.+)<\/th>$/si',$cell['value'],$matches)) $cell['value'] = trim($matches[2]);	
									$cells[] = $cell;																
								}
							}	
						}
						$table_elements[$table_section][] = $cells;				
						break;
						
					default:
						// CAPTION
						// COLGROUP
						// do nothing
						break;
				}		
			}
		}	
		
		$rows = array();
		
		foreach ($table_elements as $table_section => $table_rows) {
			if (!empty($table_rows)) {
				foreach ($table_rows as $table_row) {
					$rows[] = $table_row;
				}
			}
		}
		
		if ($rows) {															
			$this->extractTableRowsAndColumns($rows, $table_text, $column_separator, $error_msg);
		} else {
			$error_msg = JText::_('COM_TABULIZER_INVALID_TABLE_SELECTION');	
		}
		
	}
	
	function outputData($table_text, $column_separator, $range_from, $range_to, $error_msg) {
		if (!empty($error_msg)) {
			$output = json_encode(array("error_msg" => $error_msg));
		} else {
			$output = json_encode(array("table_text" => $table_text, 'column_separator' => $column_separator, 'range_from' => $range_from, 'range_to' => $range_to));
		}
		
		jexit($output);
	}	
			
}

$form = new TableExtraction();
$form->process();

?>
