--- Async worker entrypoints for map_octree_lib.
-- Loaded in async environments via core.register_async_dofile().
-- These functions run in separate threads for parallel map processing.

map_octree_async = {}

local build_batch_blobs_impl
local benchmark_build_batch_impl
local collect_content_ids_iter
local collect_ids_recursive
local async_now_us
local quantile
local run_benchmark_once

--#region types
---@class BatchBenchmarkResult
---@field p50_us number|nil
---@field p95_us number|nil
---@field blob_bytes integer
---@field samples_us number[]
---@field chunks integer
--#endregion


---Build and serialize chunk blobs for a batch in the async worker.
---@param manip userdata VoxelManip object
---@param emerged_pos1 vector
---@param emerged_pos2 vector
---@param batch_minp vector
---@param trees_x integer
---@param trees_y integer
---@param trees_z integer
---@return boolean ok
---@return table<integer, {[1]: string, [2]: integer|nil, [3]: table<integer, string>}>|string payload
function map_octree_async.build_batch_blobs(manip, emerged_pos1, emerged_pos2, batch_minp, trees_x, trees_y, trees_z)
    local ok, result = pcall(build_batch_blobs_impl, manip, emerged_pos1, emerged_pos2, batch_minp, trees_x, trees_y, trees_z)

    if ok then
        return true, result
    end
    return false, tostring(result)
end



---Benchmark build+serialize cost for a batch inside the async worker.
---This avoids returning large blob payloads: only timing + sizes are returned.
---@param manip userdata VoxelManip object
---@param emerged_pos1 vector
---@param emerged_pos2 vector
---@param batch_minp vector
---@param trees_x integer
---@param trees_y integer
---@param trees_z integer
---@param warmup integer|nil
---@param samples integer|nil
---@return boolean ok
---@return BatchBenchmarkResult|string payload
function map_octree_async.benchmark_build_batch(manip, emerged_pos1, emerged_pos2, batch_minp, trees_x, trees_y, trees_z, warmup, samples)
    local ok, result = pcall(benchmark_build_batch_impl, manip, emerged_pos1, emerged_pos2, batch_minp, trees_x, trees_y, trees_z, warmup, samples)

    if ok then
        return true, result
    end
    return false, tostring(result)
end



--- Best-effort cleanup hook to run inside async worker threads.
--- This does not guarantee the OS will reclaim RSS, but helps reduce worker heap growth.
function map_octree_async.flush()
    collectgarbage("collect")
    collectgarbage("collect")
end



---Collect content IDs using an explicit stack.
---@param root OctNode
---@param out_ids table<integer, string>
---@param stack OctNode[]
---@param CHILDREN integer
---@param ID integer
---@param get_name fun(cid: integer): string
function collect_content_ids_iter(root, out_ids, stack, CHILDREN, ID, get_name)
    local sp = 1
    stack[1] = root
    while sp > 0 do
        local node = stack[sp]
        sp = sp - 1
        local cid = node[ID]
        ---@type integer
        cid = cid
        if not out_ids[cid] then
            out_ids[cid] = get_name(cid)
        end
        local children = node[CHILDREN]
        if type(children) == "table" then
            for _, child in pairs(children) do
                sp = sp + 1
                stack[sp] = child
            end
        end
    end
end



---Collect content IDs via recursive traversal.
---@param octnode OctNode
---@param content_ids table<integer, string>
---@param ID integer
---@param CHILDREN integer
---@param get_name fun(cid: integer): string
function collect_ids_recursive(octnode, content_ids, ID, CHILDREN, get_name)
    local cid = octnode[ID]
    ---@type integer
    cid = cid
    if not content_ids[cid] then
        content_ids[cid] = get_name(cid)
    end
    local children = octnode[CHILDREN]
    if type(children) == "table" then
        for _, child in pairs(children) do
            collect_ids_recursive(child, content_ids, ID, CHILDREN, get_name)
        end
    end
end



---Return current time in microseconds (async-safe fallback).
---@return integer
function async_now_us()
    if core and type(core.get_us_time) == "function" then
        return core.get_us_time()
    end
    return math.floor(os.clock() * 1000000)
end



---Get quantile from a sorted list.
---@param sorted_values number[]
---@param q number
---@return number|nil
function quantile(sorted_values, q)
    local n = #sorted_values
    if n == 0 then return nil end
    if n == 1 then return sorted_values[1] end
    local idx = math.floor((n - 1) * q + 1.5)
    if idx < 1 then idx = 1 end
    if idx > n then idx = n end
    return sorted_values[idx]
end



---Run one benchmark pass and return aggregate sizes.
---@param area VoxelArea
---@param data integer[]
---@param param2_data integer[]
---@param batch_minp vector
---@param trees_x integer
---@param trees_y integer
---@param trees_z integer
---@param SIZE integer
---@param ID integer
---@param CHILDREN integer
---@param get_name fun(cid: integer): string
---@return integer blob_bytes
---@return integer chunk_count
function run_benchmark_once(area, data, param2_data, batch_minp, trees_x, trees_y, trees_z, SIZE, ID, CHILDREN, get_name)
    local blob_bytes = 0
    local chunk_count = 0
    for bx = 0, trees_x - 1 do
        for by = 0, trees_y - 1 do
            for bz = 0, trees_z - 1 do
                local tree_center = {
                    x = batch_minp.x + bx * SIZE,
                    y = batch_minp.y + by * SIZE,
                    z = batch_minp.z + bz * SIZE,
                }
                tree_center = octchunk.snap_to_center(tree_center)

                local tree = {
                    center = tree_center,
                    size = SIZE,
                }
                octchunk.populate_tree_from_area(tree, area, data, param2_data)

                local content_ids = {}
                collect_ids_recursive(tree, content_ids, ID, CHILDREN, get_name)

                local blob = octchunk.serialize(tree)
                blob_bytes = blob_bytes + (blob and #blob or 0)
                chunk_count = chunk_count + 1
            end
        end
    end
    return blob_bytes, chunk_count
end



---Build batch blobs without pcall wrapper.
---@param manip userdata VoxelManip object
---@param emerged_pos1 vector
---@param emerged_pos2 vector
---@param batch_minp vector
---@param trees_x integer
---@param trees_y integer
---@param trees_z integer
---@return table<integer, {[1]: string, [2]: integer|nil, [3]: table<integer, string>}>
function build_batch_blobs_impl(manip, emerged_pos1, emerged_pos2, batch_minp, trees_x, trees_y, trees_z)
    local data = {}
    local param2_data = {}
    manip:get_data(data)
    manip:get_param2_data(param2_data)
    manip:close()
    local area = VoxelArea(emerged_pos1, emerged_pos2)

    local CHILDREN = octchunk.CHILDREN
    local ID = octchunk.ID
    local SIZE = octchunk.SIZE
    local snap_to_center = octchunk.snap_to_center
    local serialize = octchunk.serialize
    local get_name = core.get_name_from_content_id

    local stack = {}
    local out = {}
    local idx = 1
    local tree_center = {x = 0, y = 0, z = 0}
    for bx = 0, trees_x - 1 do
        for by = 0, trees_y - 1 do
            for bz = 0, trees_z - 1 do
                tree_center.x = batch_minp.x + bx * SIZE
                tree_center.y = batch_minp.y + by * SIZE
                tree_center.z = batch_minp.z + bz * SIZE
                local snapped = snap_to_center(tree_center)

                local tree = {
                    center = snapped,
                    size = SIZE,
                }
                octchunk.populate_tree_from_area(tree, area, data, param2_data)

                local uniform_cid = nil
                if not tree[CHILDREN] then
                    uniform_cid = tree[ID]
                end

                local content_ids = {}
                collect_content_ids_iter(tree, content_ids, stack, CHILDREN, ID, get_name)

                out[idx] = {serialize(tree), uniform_cid, content_ids}
                idx = idx + 1
            end
        end
    end

    data = nil
    param2_data = nil
    collectgarbage("collect")
    return out
end



---Benchmark batch build without pcall wrapper.
---@param manip userdata VoxelManip object
---@param emerged_pos1 vector
---@param emerged_pos2 vector
---@param batch_minp vector
---@param trees_x integer
---@param trees_y integer
---@param trees_z integer
---@param warmup integer|nil
---@param samples integer|nil
---@return BatchBenchmarkResult
function benchmark_build_batch_impl(manip, emerged_pos1, emerged_pos2, batch_minp, trees_x, trees_y, trees_z, warmup, samples)
    warmup = math.max(0, math.floor(tonumber(warmup) or 1))
    samples = math.max(1, math.floor(tonumber(samples) or 3))

    local data = {}
    local param2_data = {}
    manip:get_data(data)
    manip:get_param2_data(param2_data)
    manip:close()

    local area = VoxelArea(emerged_pos1, emerged_pos2)
    local CHILDREN = octchunk.CHILDREN
    local ID = octchunk.ID
    local SIZE = octchunk.SIZE
    local get_name = core.get_name_from_content_id

    for _ = 1, warmup do
        run_benchmark_once(area, data, param2_data, batch_minp, trees_x, trees_y, trees_z, SIZE, ID, CHILDREN, get_name)
        collectgarbage("collect")
    end

    local timings = {}
    local last_blob_bytes = 0
    local last_chunk_count = 0
    for i = 1, samples do
        local t0 = async_now_us()
        last_blob_bytes, last_chunk_count = run_benchmark_once(area, data, param2_data, batch_minp, trees_x, trees_y, trees_z, SIZE, ID, CHILDREN, get_name)
        timings[i] = async_now_us() - t0
        collectgarbage("collect")
    end

    table.sort(timings)
    local p50 = quantile(timings, 0.50)
    local p95 = quantile(timings, 0.95)

    collectgarbage("collect")
    return {
        p50_us = p50,
        p95_us = p95,
        blob_bytes = last_blob_bytes,
        samples_us = timings,
        chunks = last_chunk_count,
    }
end
