#!/bin/env julia

const filename = "latex2unicode_utf-8"

# We want to avoid situations in which the user types e.g. \delt and pauses,
# and the result is "∇t" because "\del" gets recognized and then there is some leftover "t".
# This allows us to get completions with <Tab> for example.
function fix_completions(completions::Dict{String,String})
    allinputs = sort!(collect(keys(completions)))
    new_completions = copy(completions)

    for input in allinputs
        chars = completions[input]
        l = length(input)
        longer = filter(x->startswith(x, input)&&length(x)>l, allinputs)
        n = length(longer)
        n == 0 && continue
        new_completions[input * "<Tab>"] = chars
        for other in longer
            for j = (l+1):(length(other)-1)
                haskey(new_completions, other[1:j]) && continue
                new_completions[other[1:j]] = other[1:j]
            end
        end
    end
    return new_completions
end

function unicode_data()
    file = normpath(Sys.BINDIR, "..", "..", "doc", "UnicodeData.txt")
    names = Dict{UInt32, String}()
    open(file) do unidata
        for line in readlines(unidata)
            id, name, desc = split(line, ";")[[1, 2, 11]]
            codepoint = parse(UInt32, "0x$id")
            names[codepoint] = (name == "" ? desc : desc == "" ? name : "$name / $desc")
        end
    end
    return names
end

# Prepend a dotted circle ('◌' i.e. '\u25CC') to combining characters
function fix_combining_chars(char)
    cat = Base.Unicode.category_code(char)
    return string(cat == 6 || cat == 8 ? "◌" : "", char)
end

function table_entries(completions::Dict{String,String}, unicode_dict)
    latex = String[]
    code = String[]
    unicode = String[]
    desc = String[]

    for (input, chars) in sort!(collect(completions))
        code_points, unicode_names, characters = String[], String[], String[]
        if startswith(chars, "\\")
            push!(code_points, replace(chars, "\\" => "\\\\"))
            push!(unicode_names, "(Incomplete sequence)")
            push!(characters, "")
        else
            for char in chars
                push!(code_points, "<char-0x$(uppercase(string(UInt32(char), base = 16, pad = 5)))>")
                push!(unicode_names, get(unicode_dict, UInt32(char), "(No Unicode name)"))
                push!(characters, isempty(characters) ? fix_combining_chars(char) : "$char")
            end
        end
        push!(latex, replace(input, "\\"=>"\\\\"))
        push!(code, join(code_points))
        push!(unicode, join(characters))
        push!(desc, join(unicode_names, " + "))
    end
    return latex, code, unicode, desc
end

open("$filename.vim","w") do f
    print(f, """
        " This file is autogenerated from the script '$(basename(Base.source_path()))'
        " The symbols are based on Julia version $VERSION
        " The full generating script can be found in the comments at the bottom of this file,
        " and it can be extracted with:
        "
        "     \$ grep '^\">' $filename.vim | cut -c4- > $(basename(Base.source_path()))'
        "
        " To produce this keymap file you need to have Julia compilied from source, and
        " to run `make UnicodeData.txt` inside Julia's `doc` directory.
        " Then you can run:
        "
        "     \$ julia $(basename(Base.source_path())).jl
        "

        scriptencoding utf-8

        let b:keymap_name = "L2U"
        loadkeymap

        """)

    col_headers = ["\" Tab completion sequence", "Code point", "Character", "Unicode name"]

    latex, code, unicode, desc =
        table_entries(
            fix_completions(merge(
                REPL.REPLCompletions.latex_symbols,
                REPL.REPLCompletions.emoji_symbols
                )),
            unicode_data()
            )

    lw = max(length(col_headers[3]), maximum(map(length, latex)))
    cw = max(length(col_headers[1]), maximum(map(length, code)))
    uw = max(length(col_headers[2]), maximum(map(length, unicode)))
    dw = max(length(col_headers[4]), maximum(map(length, desc)))

    print_padded(l, c, u, d) = println(f, rpad(l, lw), " ", rpad(c, cw), " \" ", rpad(u, uw), " : ", d)

    print_padded(col_headers...)
    print_padded("\" " * "-"^(lw-2), "-"^cw, "-"^uw, "-"^dw)

    for (l, c, u, d) in zip(latex, code, unicode, desc)
        print_padded(l, c, u, d)
    end
    print_padded("\" " * "-"^(lw-2), "-"^cw, "-"^uw, "-"^dw)

    print(f, """

        " Below here is the script that was used to produce this file.

        """)
    for l in readlines(Base.source_path())
        println(f, "\"> ", l)
    end
    println(f)
end
