#!/bin/bash
#
#   Copyright (C) 2006-2018 Jarosław Staniek <staniek@kde.org>
#
#   Based on the original script by Michal Svec <rebel@atrey.karlin.mff.cuni.cz>
#
#   This program is free software; you can redistribute it and/or
#   modify it under the terms of the GNU General Public
#   License as published by the Free Software Foundation; either
#   version 2 of the License, or (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#    General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; see the file COPYING.  If not, write to
#   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
#   Boston, MA 02110-1301, USA.

#
# Generates a transliteration_table.h and transliteration_table.cpp files.
# python-Unidecode is used internally (https://pypi.org/project/Unidecode).
#
# TODO: port to python, bash version can consume 4 hours
#

set -e
function finish {
    rm -f $out_cpp_temp $out_h_temp
}
trap finish EXIT

out_cpp="transliteration_table.cpp"
out_h="transliteration_table.h"
out_cpp_temp="$out_cpp".tmp
out_h_temp="$out_h".tmp
max=65534

decl="const char *const transliteration_table[TRANSLITERATION_TABLE_SIZE + 1]"

header=\
"/* Transliteration table of `expr $max + 1` unicode characters
   Do not edit this file, it is generated
   by $(basename $0) script. */
"
echo "$header
#define TRANSLITERATION_TABLE_SIZE `expr $max + 1`
extern $decl;" > $out_h_temp

echo -n "$header
#include \"$out_h\"
#define N nullptr
$decl = {" > $out_cpp_temp

for i in `seq 0 $max` ; do
    f=`printf "%04x" $i`
    if [ "$i" -lt 16 -o "$i" -eq 92 ] ; then
        printf "$i\n/*$f*/\n_\n" $i
    elif [ "$i" -lt 128 -a "$i" -ne 32 ] ; then
        ch=`printf "%03o" $i`
        printf "$i\n/*$f*/\n\\"$ch"\n"
    else
        { /usr/bin/printf "${i}\n/*${f}*/\n\u${f}\n" 2>&- || echo "_"; }
    fi
done | \
while read i && read f && read ch; do
    if ! expr "$i" % 8 > /dev/null ; then
        expr "$i" % 320 > /dev/null || echo -n ..`expr "$i" \* 100 / $max `% >&2 #progress
        echo
    else
        f= # <-- comment to add /*numbers*/ everywhere
    fi
    r=`unidecode -c "${ch}" | sed -r -e 's/[^[:alnum:]]//g;s/_+/_/g'`
    if [ -z "$r" -o "$r" == "_" ] ; then
        echo -n "${f}N/*${ch}*/,"
    else
        echo -n "${f}\"$r\"/*${ch}*/,"
    fi
done >> $out_cpp_temp
echo ..100%
echo "
N};
#undef N" >> $out_cpp_temp

mv $out_cpp_temp $out_cpp
mv $out_h_temp $out_h
