-- Each database column is stored in a separate mod storage key, so that
-- queries only using some columns only have to load in those columns.
-- There's also a blob type for storing large amounts of data in separate keys,
-- while otherwise behaving exactly the same to the public-facing API.
-- Uses a custom format as it's probably faster than parse_json/deserialize

-- I'm not really happy with this format, since inserts will require rewriting
-- the entire table, but since Squill is not made for big databases it is
-- hopefully good enough. I didn't want to store each row in its own key
-- because that would require making lots of small tables instead of a few big
-- tables.

local byte, find, format, sub = string.byte, string.find, string.format, string.sub
local assert, tonumber, type = assert, tonumber, type

local sq = squill._internal
local storage = core.get_mod_storage()
sq.storage = storage

-- Numbers: Stored as a list separated by ;, null is represented by empty items
local NUMBERS = 'N'
sq.NUMBERS = NUMBERS

-- Integers: Equivalent to NUMBERS as far as storage.lua is concerned
local INTEGERS = 'I'
sq.INTEGERS = INTEGERS

-- Booleans: One byte per row. Stored as "1" for true, "0" for false, or
-- "-" for null
local BOOLEANS = '1'
sq.BOOLEANS = BOOLEANS

-- Strings: Encodes short strings separated by ;.
-- Warning: Does not support binary data.
-- ; is escaped to \:, and \ is escaped to \\. Null is represented by a single
-- backslash (i.e. an invalid string).
local STRINGS = 'S'
sq.STRINGS = STRINGS

-- Blobs: Large data, stored in different mod storage keys. Encoded in base64
-- for compatibility with the legacy "files" mod storage backend, as it's
-- assumed that these probably contain binary data. The column length is stored
-- where the column data would normally be stored.
-- Null values are simply non-existent, and empty strings are stored as "*".
-- Using this in lookups will probably be quite slow.
local BLOBS = 'B'
sq.BLOBS = BLOBS

-- Blobs use a metatable so that the API doesn't have to account for them.
-- INTERNAL_NULL is used as a placeholder value and is not exposed outside
-- of this file.
local blob_mt = {}
local INTERNAL_NULL = {}
function blob_mt:__index(key)
    if type(key) ~= "number" then return nil end

    if self._changed[key] ~= nil then
        if self._changed[key] == INTERNAL_NULL then
            return nil
        else
            return self._changed[key]
        end
    end

    local s = storage:get_string(self._key_prefix .. key)
    if s == "*" then
        -- Empty string (distinct from null)
        return ""
    elseif s ~= "" then
        -- Base64
        return assert(core.decode_base64(s))
    end
end

function blob_mt:__newindex(key, value)
    assert(type(key) == "number")
    if value == nil then
        value = INTERNAL_NULL
    end
    self._changed[key] = value
end

local string_escapes = {[";"] = "\\:", ["\\"] = "\\\\"}
local string_escapes_reverse = {[":"] = ";", ["\\"] = "\\"}

local ONE, ZERO = byte("10", 1, -1)

-- txs[db_name][column_key] stores changed colmuns during a transaction
local txs = {}

-- cache[column_key] stores parsed columns in memory.
-- Columns in here must exactly match what's in storage at all times.
local cache = {}
local index_cache = {}

if not core.settings:get_bool("squill.aggressive_caching") then
    -- TODO: Maybe do cache evictions manually. Lua probably knows better but
    -- this isn't guaranteed to work at all
    local weak_values = {__mode = "v"}
    setmetatable(cache, weak_values)
    setmetatable(index_cache, weak_values)
end

-- This uses the db+table+column names instead of column IDs as keys so that
-- data can be recovered more easily if the schema table gets corrupted. The
-- performance difference is probably negligible.

-- If "readonly" is set, the table will be reused for future get_column calls
function sq.get_column(db_name, table_name, col_name, default_type, readonly)
    local key = format("%s/%s/%s", db_name, table_name, col_name)
    if txs[db_name] and txs[db_name][key] then
        -- Use the current transaction column if possible
        return txs[db_name][key]
    end

    local cached_value = cache[key]
    if cached_value then
        if not readonly then
            -- Remove from the cache if this query is writing
            cache[key] = nil
        end
        return cached_value
    end

    local raw = storage:get_string(key)
    local col_type = sub(raw, 1, 1)
    local res = {type = col_type}
    if col_type == "" then
        -- Empty table
        res.type = default_type
        res.length = 0
    elseif col_type == NUMBERS or col_type == INTEGERS then
        local idx = 2
        local length = 1
        -- Does not use string.split to avoid having to call sub(2)
        while true do
            local s = find(raw, ";", idx, true)
            if not s then break end
            res[length] = tonumber(sub(raw, idx, s - 1))
            idx = s + 1
            length = length + 1
        end
        res[length] = tonumber(sub(raw, idx))
        res.length = length

    elseif col_type == BOOLEANS then
        res.length = #raw - 1
        for i = 1, res.length do
            local chr = byte(raw, i + 1)
            if chr == ONE then
                res[i] = true
            elseif chr == ZERO then
                res[i] = false
            end
        end

    elseif col_type == STRINGS then
        res = sub(raw, 2):split(";", true)
        res.type = STRINGS
        res.length = #res
        for i = 1, res.length do
            if res[i] == "\\" then
                res[i] = nil

            -- gsub() is somewhat slow
            elseif find(res[i], "\\", 1, true) then
                res[i] = res[i]:gsub("\\(.)", string_escapes_reverse)
            end
        end

    elseif col_type == BLOBS then
        local length = tonumber(sub(raw, 2))
        res = setmetatable({
            _key_prefix = key .. "/",
            _changed = {},
            _old_length = length,
            type = BLOBS,
            length = length,
        }, blob_mt)
    else
        error(format("Unknown table type %q or outdated squill version: %q",
            col_type, key))
    end

    if readonly then
        -- It's safe to put blobs in cache if they aren't modified because
        -- _changed will just be empty
        cache[key] = res
    end

    return res
end

-- Mod storage seems to be atomic if you call set_string twice without giving
-- control back to C++, hopefully this doesn't change
local function set_column_raw(key, column)
    if key == "squill/schema/type" and column[1] == "yay" then
        error("???")
    end

    local data
    if column.length == 0 and column.type ~= BLOBS then
        -- Do not store empty values
        data = ""
    elseif column.type == NUMBERS or column.type == INTEGERS then
        local strings = {}
        for i = 1, column.length do
            if column[i] == nil then
                strings[i] = ""
            else
                strings[i] = format("%.17g", column[i])
            end
        end
        data = column.type .. table.concat(strings, ";")

    elseif column.type == BOOLEANS then
        local strings = {}
        for i = 1, column.length do
            if column[i] == nil then
                strings[i] = "-"
            else
                strings[i] = column[i] and "1" or "0"
            end
        end
        data = BOOLEANS .. table.concat(strings, "")

    elseif column.type == STRINGS then
        local strings = {}
        for i = 1, column.length do
            if column[i] == nil then
                strings[i] = "\\"
            elseif find(column[i], ";", 1, true) or find(column[i], "\\", 1, true) then
                strings[i] = string.gsub(column[i], "[;\\]", string_escapes)
            else
                strings[i] = column[i]
            end
        end
        data = STRINGS .. table.concat(strings, ";")

    elseif column.type == BLOBS then
        local prefix = key .. "/"
        for k, v in pairs(column._changed or column) do
            if type(k) == "number" and k <= column.length then
                if v == INTERNAL_NULL then
                    storage:set_string(prefix .. k, "")
                elseif v == "" then
                    storage:set_string(prefix .. k, "*")
                else
                    storage:set_string(prefix .. k, core.encode_base64(v))
                end
            end
        end

        -- If the column has decreased in size, remove any old values
        if column._old_length then
            for i = column.length + 1, column._old_length do
                storage:set_string(prefix .. i, "")
            end
        end

        data = column.length > 0 and BLOBS .. column.length or ""
    else
        error(format("Unknown column type %q for %q", column.type, key))
    end

    storage:set_string(key, data)
end

local function add_to_cache(key, column)
    if column.type == BLOBS then
        -- Don't cache blob tables because of _changed
        cache[key] = nil
    else
        -- Put modified columns back in the cache
        cache[key] = column
    end
end

-- If index_updated is true, the cached index value is kept
function sq.set_column(db_name, table_name, col_name, column, index_updated)
    local key = format("%s/%s/%s", db_name, table_name, col_name)
    if txs[db_name] then
        txs[db_name][key] = column
    else
        set_column_raw(key, column)
        add_to_cache(key, column)
    end

    if not index_updated then
        index_cache[key] = nil
    end
end

-- Transactions: Changed columns are just stored in RAM until they are
-- committed or rolled back
function sq.begin_transaction(db_name)
    assert(not txs[db_name], "A transaction is already in progress")
    txs[db_name] = {}

    -- This isn't a good solution but it's better than silently not saving data
    -- if a transaction is accidentally not closed
    core.after(0, function()
        assert(not txs[db_name], format("Transaction on database %q not closed", db_name))
    end)
end

function sq.commit_transaction(db_name, clear_modified_column_cache)
    assert(txs[db_name], "No transaction is in progress")
    for key, column in pairs(txs[db_name]) do
        set_column_raw(key, column)
        if clear_modified_column_cache then
            cache[key] = nil
        else
            add_to_cache(key, column)
        end
        index_cache[key] = nil
    end
    txs[db_name] = nil
end

function sq.rollback_transaction(db_name, do_not_error)
    assert(txs[db_name] or do_not_error, "No transaction is in progress")
    if txs[db_name] then
        -- Clear index_cache for any changed columns as they have possibly been
        -- used by the transation.
        for key in pairs(txs[db_name]) do
            index_cache[key] = nil
        end
    end
    txs[db_name] = nil
end

function sq.assert_no_transaction(db_name)
    if txs[db_name] then
        error("This statement may not be run inside a transaction", 3)
    end
end

-- Indexes. Assumes a UNIQUE column
function sq.get_unique_index(db_name, table_name, col_name, default_type, allow_cache)
    local key = format("%s/%s/%s", db_name, table_name, col_name)
    local cached = index_cache[key]
    if cached then return cached end

    local index = {}
    local column = sq.get_column(db_name, table_name, col_name, default_type, true)
    for i = 1, column.length do
        local value = column[i]
        if value ~= nil then
            index[value] = i
        end
    end

    if allow_cache then
        index_cache[key] = index
    end

    return index
end

-- Intended for benchmarking
function squill.drop_column_cache()
    for k in pairs(cache) do
        cache[k] = nil
    end
    for k in pairs(index_cache) do
        index_cache[k] = nil
    end
end
