#!/usr/bin/env php
<?php
error_reporting(E_ALL);
function error_handler($errno, $errstr, $errfile, $errline) {
	echo "$errstr at $errfile:$errline\n";
	exit(1);
}
set_error_handler('error_handler');

$english_path = 'en-US-new'; // New English locale
$locale_path = 'locales'; // Source locales from run script
$content_locale_path = 'content-locales'; // Existing content locales from previous version
$output_dir = 'output/locale'; // Directory to save merged locales
$content_output_dir = 'output/content'; // Directory to save merged content locales
$use_content_input_dir = false;
$use_content_output_dir = true; // set to true for XPI, false for BZ
$localeCodeInOutputXML = true; // set to true for XPI, false for BZ

$locale_re = '/^([a-z]{2})(\-[A-Z]{2})?$/';
$locale_file_re = '/^[a-z].+\.(dtd|properties)$/';

// Zotero files
$english_files = array_filter(
	scandir($english_path),
	fn ($path) => preg_match($locale_file_re, $path)
);
// Mozilla files
$english_mozilla_files = array_filter(
	scandir($english_path . "/mozilla"),
	fn ($path) => preg_match($locale_file_re, $path)
);
$english_mozilla_files = array_map(fn ($path) => 'mozilla/' . $path, $english_mozilla_files);
$all_english_files = array_merge($english_files, $english_mozilla_files);

// Make output directories for each locale
$locales = array_filter(scandir($locale_path), fn ($path) => preg_match($locale_re, $path));
foreach ($locales as $locale) {
	preg_match($locale_re, $locale, $matches);
	if (!$matches) {
		continue;
	}
	$dir = $output_dir . '/' . $locale . '/zotero/';
	@mkdir($dir, 0775, true);
}

// Make content output directory for CSL files
if ($use_content_output_dir) {
	@mkdir("$content_output_dir/csl/", 0775, true);
}

foreach ($all_english_files as $file) {
	echo "Processing $file\n";
	
	$extension = substr(strrchr($file, '.'), 1);
	
	foreach ($locales as $locale) {
		preg_match('/([a-z]{2})(\-[A-Z]{2})?/', $locale, $matches);
		if (!isset($matches[1])) {
			continue;
		}
		$locale = $matches[1];
		if (!empty($matches[2])) {
			$locale .= $matches[2];
		}
		
		$locale_source_file = "$locale_path/$locale/zotero/$file";
		$output_locale_dir = "$output_dir/$locale/zotero/";
		
		if (!file_exists($output_locale_dir . '/mozilla')) {
			mkdir($output_locale_dir . '/mozilla', 0755);
		}
		
		if ($file == 'locales.xml') {
			if (strlen($locale) == 2) {
				$csl_locale = $locale . '-' . strtoupper($locale);
			}
			else {
				$csl_locale = $locale;
			}
			
			if ($use_content_input_dir) {
				$locale_source_file = "$content_locale_path/csl/locales-$csl_locale.xml";
			}
			
			if ($use_content_output_dir) {
				$output_locale_dir = "$content_output_dir/csl/";
			}
			
			if ($localeCodeInOutputXML) {
				$save_file = "locales-$csl_locale.xml";
			}
			else {
				$save_file = "locales.xml";
			}
			
			echo "Saving {$output_locale_dir}{$save_file}\n";
			
			$string = generate_csl_locale(
				"$english_path/$file",
				$locale_source_file,
				$locale
			);
		}
		else {
			echo "Saving {$output_locale_dir}{$file}\n";
			
			$save_file = $file;
			
			$string = generate_locale(
				$extension,
				"$english_path/$file",
				$locale_source_file,
				$locale,
				// Preserve whitespace in Mozilla files
				str_contains($file, 'mozilla')
			);
		}
		
		// We can't handle this file, so bail
		if (!$string) {
			throw new Exception("Error generating file");
		}
		
		file_put_contents($output_locale_dir . $save_file, $string);
	}
}


function parse_strings($type, $file, $locale = null) {
	if (!file_exists($file)) {
		$msg = "Source file $file doesn't exist";
		if ($locale) {
			if (strpos($file, 'updates.dtd') !== false) {
				$missing_locales = ['br', 'en-GB', 'gl-ES', 'hu-HU', 'mn-MN'];
				if (in_array($locale, $missing_locales)) {
					echo $msg . "\n";
					return [];
				}
			}
		}
		throw new Exception($msg);
	}
	
	$pairs = array();
	
	switch ($type) {
		case 'dtd':
			// 1: space after ENTITY
			// 2: key
			// 3: space after key
			// 4: string
			// 5: space before >
			// 6: newlines
			$regex = '/<!ENTITY(\s*)([^\s]*)(\s*)"([^"]*)"(\s*)> *(\n*)(\n|$)/s';
			break;
		
		case 'properties':
			$regex = '/([^\s]*)\s*= *([^\n]*)(\s*)(\n|$)/s';
			break;
		
		default:
			throw new Exception("Unsupported extension .$type");
	}
	
	preg_match_all($regex, file_get_contents($file), $matches, PREG_SET_ORDER);
	foreach ($matches as $match) {
		if ($type == 'dtd') {
			$pairs[$match[2]] = $match;
		}
		else if ($type == 'properties') {
			$pairs[$match[1]] = $match;
		}
		else {
			throw new Exception("Unsupported");
		}
	}
	
	return $pairs;
}



function generate_locale($type, $english_file, $locale_file, $locale, $preserveWhitespace = false) {
	$output = '';
	
	// Keep copyright block at top of Mozilla files
	preg_match('/^(<!--.+?-->|# This Source Code.+?2.0\/\.)\s+/s', file_get_contents($locale_file), $matches);
	if ($matches) {
		$output .= $matches[0];
	}
	
	$english_pairs = parse_strings($type, $english_file);
	if (!$english_pairs) {
		throw new Exception("No English pairs!");
	}
	
	$locale_pairs = parse_strings($type, $locale_file, $locale);
	
	foreach ($english_pairs as $key => $val) {
		if (!$val) {
			if ($output != '') {
				$output .= "\n";
			}
			continue;
		}
		
		$english_val = $english_pairs[$key];
		$locale_val = empty($locale_pairs[$key]) ? $english_val : $locale_pairs[$key];
		
		switch ($type) {
			case 'dtd':
				// Don't replace string with space, only truly empty string, or else we mess up
				// zotero.merge.of in Estonian, which apparently doesn't exist
				if (empty($locale_val[4])) {
					$locale_val = $english_val;
				}
				
				// Keep spacing between components
				if ($preserveWhitespace) {
					$prefix = '<!ENTITY' . $locale_val[1];
					$middle = $locale_val[3];
					$string = '"' . $locale_val[4] . '"';
					$suffix = $locale_val[5] . ">" . $english_val[6];
				}
				else {
					$prefix = '<!ENTITY ';
					$middle = " ";
					$string = '"' . $locale_val[4];
					$suffix = '">' . $english_val[6];
				}
				break;
			
			case 'properties':
				// If empty value, use English
				if (empty(trim($locale_val[2]))) {
					$locale_val = $english_val;
				}
				
				$prefix = '';
				$middle = '=';
				$string = $locale_val[2];
				$suffix = $english_val[3];
				break;
			
			default:
				echo "Unsupported extension $type\n";
				return false;
		}
		
		$output .= $prefix . $key . $middle . $string . $suffix . "\n";
	}
	
	return trim($output) . "\n";
}


function generate_csl_locale($english_file, $locale_file, $locale) {
	$output = '';
	
	$english_str = file_get_contents($english_file);
	$english_sxe = new SimpleXMLElement($english_str);
	
	$str = file_get_contents($locale_file);
	if (!$str) {
		echo "Locale version of locales.xml not found\n";
		return $english_str;
	}
	$locale_sxe = new SimpleXMLElement($str);
	
	$xw = new XMLWriter();
	$xw->openMemory();
	$xw->startDocument('1.0', 'UTF-8');
	$xw->startElement ('terms');
	$xw->writeAttribute('xmlns', 'http://purl.org/net/xbiblio/csl');
	$xw->startElement('locale');
	$xw->writeAttribute('xml:lang', substr($locale, 0, 2));
	
	$locale_sxe->registerXPathNamespace('csl', 'http://purl.org/net/xbiblio/csl');
	
	foreach ($english_sxe->locale->term as $term) {
		$name = $term->attributes()->name;
		$form = $term->attributes()->form;
		
		if ($form) {
			$node = $locale_sxe->xpath("//csl:term[@name='$name' and @form='$form']");
		}
		else {
			$node = $locale_sxe->xpath("//csl:term[@name='$name' and not(@form)]");
		}
		
		if (isset($node[0])) {
			$node = $node[0];
		}
		else {
			$node = $term;
		}
		
		$xw->startElement('term');
		$xw->writeAttribute('name', $name);
		if ($form) {
			$xw->writeAttribute('form', $form);
		}
		
		if (sizeOf($term->children()) > 0) {
			$xw->startElement('single');
			$xw->text($node->single ? $node->single : $term->single);
			$xw->endElement();
			$xw->startElement('multiple');
			$xw->text($node->multiple ? $node->multiple : $term->multiple);
			$xw->endElement();
		}
		else {
			// If original had children and we don't any longer, use English
			if (sizeOf($node[0]->children()) > 0) {
				$xw->text($term);
			}
			// Otherwise use the locale string
			else {
				$xw->text($node[0]);
			}
		}
		$xw->endElement(); // </term>
	}
	
	$xw->endElement(); // </locale>
	$xw->endElement(); // </terms>
	$str = $xw->outputMemory(true);
	
	$doc = new DOMDocument('1.0');
	$doc->formatOutput = true;
	$doc->loadXML($str);
	return $doc->saveXML();
}
?>
