#!/usr/bin/perl

# builds charm2.stc from charm2.dat and the result of the SIMBATCH query
#
# the SIMBATCH query should be located in a file 'simbad.txt' in the current
# directory. The file 'idlist.txt' (generated by charm2simbatch.pl) should also
# be located in the current directory.
#
# This script requires as a parameter the path to the Celestia directory.
# The charm2.stc file is output into the current directory.

use File::Spec::Functions;
use Math::Trig;

$KM_PER_LY = 9460730472580.8;
$MAS_PER_RAD = 648000000 / pi;

die "Syntax: perl charm2.pl <path-to-celestia>\n" if($#ARGV != 0);

$PATH_TO_CELESTIA = $ARGV[0];
$SIMBATCH_RESULT_FILE = 'simbad.txt';
$IDLIST_FILE = 'idlist.txt';
$CHARM2_FILE = 'charm2.dat';
$STC_FILE = 'charm2.stc';

# build a table of HIP identifiers for each ID

%HIPids = (); # key = id, value = HIP
%distances = (); # distances in light years

open SIMBAD, '<', $SIMBATCH_RESULT_FILE or die "Cannot open file $SIMBATCH_RESULT_FILE for reading.\n";

# skip SIMBATCH file header to the data section

while($curLine = <SIMBAD>) {
	chomp $curLine;
	last if($curLine =~ m/^::data:*$/);
}

$curLine = <SIMBAD>;

open IDLIST, '<', $IDLIST_FILE or die "Cannot open file $IDLIST_FILE for reading.\n";

while($curLine = <SIMBAD>) {
	chomp $curLine;
	last if(!($curID = <IDLIST>)); # end if we've run out of IDs
	next if($curLine eq ''); # skip if there is not HIP for this ID
	chomp $curID;
	if($curLine =~ m/^HIP ([0-9]+)$/) {
		$HIPids{$curID} = $1;
		$distances{$1} = 0;
	}
}

close SIMBAD;
close IDLIST;

%baryDistance = (); # barycenter distances
%nameAlias = (); # list of aliases
%orbitSize = (); # list of orbit semimajor axes

$fileName = catfile($PATH_TO_CELESTIA, 'data', 'starnames.dat');

open NAMES, '<', $fileName or die "Cannot open file $fileName for reading.\n";

while($curLine = <NAMES>) {
	chomp $curLine;
	if($curLine =~ m/^([0-9]+):(.*)$/) {
		$HIP = $1; # prevent the regular expression matching in split
		$nL = $2;  # from destroying these values
		@nameList = split(/:/, $nL);
		for($i = 0; $i <= $#nameList; $i++) {
			$nameAlias{uc($nameList[$i])} = "HIP $HIP";
		}
	}
}

close NAMES;

# load stars.dat to get distances

$fileName = catfile($PATH_TO_CELESTIA, 'data', 'stars.dat');
open STARS, '<', $fileName or die "Cannot open file $fileName for reading.\n";
binmode STARS;

# read file header: test whether this is a star database and get number of stars

read(STARS, $buf, 14);

($fileType, $version, $nRecords) = unpack('A8SL', $buf);

die "Bad stars.dat format" if(($fileType ne 'CELSTARS') || ($version != 0x0100));

for($i = 0; $i < $nRecords; $i++) {
	read(STARS, $buf, 20);
	($HIP, $x, $y, $z) = unpack('Lfffx4', $buf); # don't need magnitude or spectral type
	if(exists $distances{$HIP}) {
		$distances{$HIP} = sqrt($x * $x + $y * $y + $z * $z);
	}
}

close STARS;

# parse stc files to update distances

# list of datafiles in the order that is specified in celestia.cfg
# TODO: could load this from celestia.cfg
@fileList = ('revised.stc', 'extrasolar.stc', 'nearstars.stc', 'visualbins.stc', 'spectbins.stc');

for($i = 0; $i <= $#fileList; $i++) {
	$fileName = catfile($PATH_TO_CELESTIA, 'data', $fileList[$i]);
	open STC, '<', $fileName or die "Cannot open file $fileName for reading.\n";
	@stcFile = <STC>;
	close STC;
	for($j = 0; $j <= $#stcFile; $j++) {
		chomp $stcFile[$j]; # remove newlines
		$stcFile[$j] =~ s/^\s+//; # remove leading spaces
		$stcFile[$j] =~ s/\s*(?:#.*)?$//; # remove trailing spaces & comments
	}
	$stc = join(' ', @stcFile);
	@stcFile = ();
	$stc =~ s/\s+/ /g;
	# set up states
	$name = '';
	$HIP = '';
	$isModify = 0;
	$isBarycenter = 0;
	$blockLevel = 0;
	$curItem = 0;
	while(1) {
		# get next token
		$stc =~ s/^\s+//; # skip whitespace
		last if($stc eq '');
		$tok = substr($stc, 0, 1);
		$stc = substr($stc, 1);
		if($tok eq '"') {
			# we have a quoted token, extract it and handle escapes.
			# there's probably a way to do this with regular expressions
			# but I can't figure it out, so we do it this way
			while(substr($stc, 0, 1) ne '"') {
				if(substr($stc, 0, 1) eq '\\') {
					# handle character escapes
					$tok .= substr($stc, 0, 2);
					$stc = substr($stc, 2);
				} else {
					$tok .= substr($stc, 0, 1);
					$stc = substr($stc, 1);
				}
			}
			$tok .= substr($stc, 0, 1);
			$stc = substr($stc, 1);
		} elsif($tok eq '[') {
			# we have a vector
			$stc =~ s/^(.*?)\]//;
			$tok .= $1;
		} else {
			# unquoted token
			$stc =~ s/^(.*?)\s//;
			$tok .= $1;
		}
		# parse token
		if($tok eq '{') {
			# increase block level
			$blockLevel++;
			$curItem = 0;
			if($blockLevel > 0) {
				$curProperty = '';
			}
			if($blockLevel == 1) {
				$distance = '';
				$baryRef = '';
			}
		} elsif($tok eq '}') {
			# decrease block level
			$blockLevel--;
			$curItem = 0;
			if($blockLevel == 0) {
				# process previously-parsed star
				if($isBarycenter) {
					# add barycenter to names list
					$name = uc((($HIP ne '') ? ('HIP ' . $HIP . ':') : '') . $name);
					$baryRef = uc($baryRef);
					@nameList = split(/:/, $name);
					if($baryRef eq '') {
						$baryDistance{$nameList[0]} = $distance;
						for($j = 0; $j <= $#nameList; $j++) {
							$nameAlias{$nameList[$j]} = $nameList[0];
						}
					} elsif(exists $nameAlias{$baryRef}) {
						for($j = 0; $j <= $#nameList; $j++) {
							$nameAlias{$nameList[$j]} = $nameAlias{$baryRef};
						}
					} else {
						print "Unresolvable barycenter name: $baryRef\n";
					}
				}
				if(($HIP ne '') && (exists $distances{$HIP})) {
					if($isBarycenter) {
						# if a CHARM2 star gets replaced by a barycenter,
						# we drop it from the lists
						# TODO: handle modify on barycenters
						delete $distances{$HIP};
						@idList = keys %HIPids;
						for($j = 0; $j <= $#idList; $j++) {
							if($HIPids{$idList[$j]} == $HIP) {
								delete $HIPids{$idList[$j]};
							}
						}
						@idList = ();
					} elsif($baryRef eq '') {
						$distances{$HIP} = $distance if($distance ne '');
					} else {
						$distances{$HIP} = $baryDistance{$nameAlias{uc($baryRef)}};
					}
				}
				# reset tokenizer
				$name = '';
				$HIP = '';
				$isModify = 0;
				$isBarycenter = 0;
			}
		} elsif($blockLevel == 0) {
			# get star identifiers
			# TODO: check whether terms appear in the right order and
			# that they don't appear multiple times
			if($tok =~ m/^Barycent(?:er|re)$/) {
				$isBarycenter = 1;
			} elsif($tok eq 'Modify') {
				$isModify = 1;
			} elsif($tok =~ m/^[0-9]+$/) {
				$HIP = $tok;
			} elsif(substr($tok, 0, 1) eq '"') {
				$name = substr($tok, 1, length($tok) - 2);
			} else {
				print "BAD FORMAT IN FILE $fileName\n";
			}
		} elsif($blockLevel == 1) {
			# read name or value
			if($curItem == 0) {
				$curProperty = $tok;
			} else {
				if($curProperty eq 'Distance') {
					if($tok =~ m/^[0-9]+(\.[0-9]+)?$/) {
						$distance = $tok;
					} else {
						die "BAD FORMAT IN FILE $fileName\n";
					}
				} elsif($curProperty =~ m/^OrbitBarycent(?:er|re)$/) {
					if(substr($tok, 0, 1) eq '"') {
						$baryRef = substr($tok, 1, length($tok) - 2);
					} else {
						die "BAD FORMAT IN FILE $fileName\n";
					}
				}
			}
			$curItem = $curItem^1;
		}
	}
}

# release memory
%nameAlias = ();
%baryDistance = ();

# set up UD, LD totals
%UD_total = ();
%UD_weight = ();
%LD_total = ();
%LD_weight = ();
%Dtypes = ();

# parse CHARM2
open CHARM2, '<', $CHARM2_FILE or die "Cannot open file $CHARM2_FILE for reading.\n";

while($curLine = <CHARM2>) {
	chomp $curLine;
	next if($curLine eq '');
	$Source = substr($curLine, 30, 24);
	$Source =~ s/\s//g;
	next if(!exists $HIPids{$Source});
	$HIP = $HIPids{$Source};
	next if($distances{$HIP} == 0); # if there's no distance measurement, skip
	$Type = Trim(substr($curLine, 25, 4));
	next if($Type eq 'UR'); # unresolved diameter, so skip
	$UD   = Trim(substr($curLine, 143, 6));
	$e_UD = Trim(substr($curLine, 150, 5));
	$LD   = Trim(substr($curLine, 162, 6));
	$e_LD = Trim(substr($curLine, 169, 4));
	next if(($UD eq '') && ($LD eq '')); # no diameters in record, skip
	$e_UD = 100 if(($UD ne '') && ($e_UD <= 0));
	$e_LD = 100 if(($LD ne '') && ($e_LD <= 0));
	if($UD ne '') {
		if(!exists $UD_total{$HIP}) {
			$UD_total{$HIP} = 0;
			$UD_weight{$HIP} = 0;
		}
		$Dtypes{$HIP} = 1 if(!exists $Dtypes{$HIP});
		$UD_total{$HIP} += $UD / ($e_UD * $e_UD);
		$UD_weight{$HIP} += 1 / ($e_UD * $e_UD);
		$Dtypes{$HIP} |= 1;
	}
	if($LD ne '') {
		if(!exists $LD_total{$HIP}) {
			$LD_total{$HIP} = 0;
			$LD_weight{$HIP} = 0;
		}
		$Dtypes{$HIP} = 2 if(!exists $Dtypes{$HIP});
		$LD_total{$HIP} += $LD / ($e_LD * $e_LD);
		$LD_weight{$HIP} += 1 / ($e_LD * $e_LD);
		$Dtypes{$HIP} |= 2;
	}
}

close CHARM2;

# select diameter type with smallest error
foreach $HIP (keys %Dtypes) {
	if($Dtypes{$HIP} == 3) {
		if($UD_weight{$HIP} > $LD_weight{$HIP}) {
			delete $LD_total{$HIP};
			delete $LD_weight{$HIP};
			$Dtypes{$HIP} = 1;
		} else {
			delete $UD_total{$HIP};
			delete $UD_weight{$HIP};
			$Dtypes{$HIP} = 2;
		}
	}
}

open STC, '>', $STC_FILE or die "Cannot open file $STC_FILE for writing.\n";

print STC <<END;
# Celestia star radii version 1.2 (21/09/2008)
# Generated by Andrew Tribick from:
#
# CHARM2: an updated Catalog of High Angular Resolution Measurements
# Richichi A., Percheron I., Khristoforva M., A&A 431, 773 (2005)
#
# Celestia datafiles:
# revised.stc, extrasolar.stc, nearstars.stc, visualbins.stc, spectbins.stc
#
# Radii generated from weighted averages of multiple entries in the CHARM2
# catalog. Where uncertainties on the angular values were not supplied, the
# error is assumed to be 100 mas. In cases where both limb darkened and uniform
# disk radii are available, the one with the lowest error is chosen.

END

# output .stc file
foreach $HIP (sort { $a <=> $b } keys %Dtypes) {
	if($Dtypes{$HIP} == 1) {
		$angDiam = $UD_total{$HIP} / $UD_weight{$HIP};
	} else {
		$angDiam = $LD_total{$HIP} / $LD_weight{$HIP};
	}
	$radius = $angDiam / 2 * $distances{$HIP};
	$radius = $radius * ($KM_PER_LY / $MAS_PER_RAD);
	print STC "\nModify $HIP\n";
	print STC "{\n";
	print STC "\tRadius " . sprintf('%.0f', sprintf('%.4g', $radius));
	print STC " # " . (($Dtypes{$HIP} == 1) ? 'Uniform disk' : 'Limb darkened') . " angular diameter = ";
	print STC sprintf("%.2f mas\n", $angDiam);
	print STC "}\n";
}

close STC;

sub Trim {
	$st = shift;
	$st =~ s/^\s+//;
	$st =~ s/\s+$//;
	return $st;
}
