Skynet 热更新机制详解

全面讲解 Skynet 热更新机制,包括代码热更新、配置热更新、Lua 模块重加载、数据迁移和生产环境实践

热更新是游戏服务器开发中至关重要的能力,它允许在不停机的情况下更新代码和配置,保证服务的持续可用性。本教程将深入讲解 Skynet 中的热更新机制和最佳实践。

热更新的挑战

为什么需要热更新

  1. 服务连续性:游戏服务器需要 24/7 运行,不能频繁停机
  2. 快速响应:线上 bug 需要快速修复
  3. 运营需求:活动配置、数值配置需要频繁调整
  4. 版本迭代:小版本更新无需重启

热更新的难点

  1. 状态保持:更新代码时不能丢失内存中的状态
  2. 版本兼容:新旧代码可能同时存在
  3. 并发安全:更新过程中可能有并发请求
  4. 回滚机制:更新失败时需要能回滚

Lua 代码热更新

基础热更新原理

Lua 的 require 默认会缓存模块,要实现热更新需要清除缓存:

-- lualib/hotfix.lua
local skynet = require "skynet"

local HotFix = {}

-- 重新加载模块
function HotFix.reload(module_name)
    -- 清除 package.loaded 缓存
    package.loaded[module_name] = nil
    
    -- 重新 require
    local ok, mod = pcall(require, module_name)
    if not ok then
        skynet.error("模块重载失败:", module_name, mod)
        return false, mod
    end
    
    return true, mod
end

-- 重载所有匹配的模块
function HotFix.reload_pattern(pattern)
    local reloaded = {}
    for name in pairs(package.loaded) do
        if string.match(name, pattern) then
            package.loaded[name] = nil
            local ok, mod = pcall(require, name)
            if ok then
                reloaded[#reloaded + 1] = name
            end
        end
    end
    return reloaded
end

-- 重载指定目录下的所有模块
function HotFix.reload_dir(dir)
    local reloaded = {}
    for name in pairs(package.loaded) do
        if string.sub(name, 1, #dir) == dir then
            package.loaded[name] = nil
            local ok = pcall(require, name)
            if ok then
                reloaded[#reloaded + 1] = name
            end
        end
    end
    return reloaded
end

return HotFix

更新服务代码

-- service/updater.lua
local skynet = require "skynet"
local HotFix = require "hotfix"

local CMD = {}

-- 更新指定服务的代码
function CMD.update_service(service_name)
    skynet.error("开始更新服务:", service_name)
    
    -- 查找所有运行该服务代码的实例
    local launcher = skynet.localname(".launcher")
    local services = skynet.call(launcher, "lua", "list")
    
    local updated = 0
    for addr, name in pairs(services) do
        if name == service_name then
            -- 通知服务重载
            local ok, err = pcall(skynet.call, addr, "lua", "_hotfix")
            if ok then
                updated = updated + 1
                skynet.error(string.format("服务 :%08x 更新成功", addr))
            else
                skynet.error(string.format("服务 :%08x 更新失败: %s", addr, err))
            end
        end
    end
    
    return updated
end

-- 更新指定 Lua 模块
function CMD.update_module(module_name)
    -- 广播给所有服务重新加载模块
    local launcher = skynet.localname(".launcher")
    local services = skynet.call(launcher, "lua", "list")
    
    local results = {}
    for addr in pairs(services) do
        local ok, result = pcall(skynet.call, addr, "lua", 
            "_reload_module", module_name)
        results[addr] = {ok = ok, result = result}
    end
    
    return results
end

-- 执行更新脚本
function CMD.run_script(script_path)
    local file = io.open(script_path, "r")
    if not file then
        return false, "无法打开脚本文件"
    end
    
    local script = file:read("*a")
    file:close()
    
    local chunk, err = load(script, script_path)
    if not chunk then
        return false, "脚本编译失败: " .. err
    end
    
    local ok, result = pcall(chunk)
    if not ok then
        return false, "脚本执行失败: " .. result
    end
    
    return true, result
end

skynet.start(function()
    skynet.register(".updater")
    
    skynet.dispatch("lua", function(session, source, cmd, ...)
        local f = assert(CMD[cmd])
        if session ~= 0 then
            skynet.retpack(f(...))
        else
            f(...)
        end
    end)
end)

服务内置热更新支持

每个业务服务可以内置热更新接口:

-- 业务服务模板(包含热更新支持)
local skynet = require "skynet"
local HotFix = require "hotfix"

-- 业务逻辑模块
local business = require "business_logic"

local CMD = {}

-- 业务命令
function CMD.process(data)
    return business.process(data)
end

-- ===== 热更新接口 =====

-- 重载单个模块
function CMD._reload_module(module_name)
    local ok, mod = HotFix.reload(module_name)
    if ok and module_name == "business_logic" then
        business = mod  -- 更新引用
    end
    return ok
end

-- 完整热更新
function CMD._hotfix()
    -- 1. 保存当前状态
    local state = business.get_state and business.get_state()
    
    -- 2. 重新加载业务模块
    local ok, new_business = HotFix.reload("business_logic")
    if not ok then
        return false, new_business
    end
    
    -- 3. 迁移状态到新模块
    if state and new_business.set_state then
        new_business.set_state(state)
    end
    
    -- 4. 更新引用
    business = new_business
    
    return true
end

skynet.start(function()
    skynet.dispatch("lua", function(session, source, cmd, ...)
        local f = assert(CMD[cmd])
        if session ~= 0 then
            skynet.retpack(f(...))
        else
            f(...)
        end
    end)
end)

配置热更新

配置管理服务

-- service/config_mgr.lua
local skynet = require "skynet"
local cjson = require "cjson"

local configs = {}      -- name -> config_data
local config_files = {} -- name -> file_path
local watchers = {}     -- name -> {service, cmd}
local file_mtimes = {}  -- file -> mtime

local CMD = {}

-- 加载配置
function CMD.load(name, file_path)
    local file = io.open(file_path, "r")
    if not file then
        return false, "文件不存在: " .. file_path
    end
    
    local content = file:read("*a")
    file:close()
    
    local ok, data = pcall(cjson.decode, content)
    if not ok then
        return false, "JSON 解析失败: " .. data
    end
    
    configs[name] = data
    config_files[name] = file_path
    
    -- 记录文件修改时间
    local f = io.popen(string.format("stat -c %%Y %s 2>/dev/null || stat -f %%m %s", 
        file_path, file_path))
    local mtime = tonumber(f:read("*l"))
    f:close()
    file_mtimes[file_path] = mtime
    
    skynet.error(string.format("配置已加载: %s (%s)", name, file_path))
    return true
end

-- 获取配置
function CMD.get(name)
    return configs[name]
end

-- 获取配置项
function CMD.get_item(name, key)
    local config = configs[name]
    if config then
        return config[key]
    end
    return nil
end

-- 更新配置(运行时修改)
function CMD.update(name, key, value)
    if not configs[name] then
        configs[name] = {}
    end
    configs[name][key] = value
    
    -- 通知订阅者
    notify_watchers(name)
    return true
end

-- 订阅配置变更
function CMD.watch(name, service, cmd)
    if not watchers[name] then
        watchers[name] = {}
    end
    table.insert(watchers[name], {service = service, cmd = cmd})
end

-- 重新加载所有配置
function CMD.reload_all()
    local reloaded = {}
    for name, file_path in pairs(config_files) do
        local ok, err = CMD.load(name, file_path)
        if ok then
            reloaded[#reloaded + 1] = name
            notify_watchers(name)
        else
            skynet.error(string.format("重载失败 %s: %s", name, err))
        end
    end
    return reloaded
end

-- 检查并更新修改过的配置
function CMD.check_update()
    local updated = {}
    for name, file_path in pairs(config_files) do
        local f = io.popen(string.format("stat -c %%Y %s 2>/dev/null || stat -f %%m %s",
            file_path, file_path))
        local mtime = tonumber(f:read("*l"))
        f:close()
        
        if mtime and mtime ~= file_mtimes[file_path] then
            local ok, err = CMD.load(name, file_path)
            if ok then
                updated[#updated + 1] = name
                notify_watchers(name)
                file_mtimes[file_path] = mtime
                skynet.error("配置已更新:", name)
            end
        end
    end
    return updated
end

-- 保存配置到文件
function CMD.save(name)
    local file_path = config_files[name]
    local data = configs[name]
    if not file_path or not data then
        return false, "配置不存在"
    end
    
    local file = io.open(file_path, "w")
    if not file then
        return false, "无法写入文件"
    end
    
    file:write(cjson.encode(data))
    file:close()
    return true
end

-- 通知订阅者
function notify_watchers(name)
    local list = watchers[name]
    if not list then return end
    
    for _, watcher in ipairs(list) do
        pcall(skynet.send, watcher.service, "lua", watcher.cmd, name, configs[name])
    end
end

skynet.start(function()
    skynet.register(".config_mgr")
    
    -- 定时检查配置文件更新
    skynet.fork(function()
        while true do
            skynet.sleep(100)  -- 每秒检查
            pcall(CMD.check_update)
        end
    end)
    
    skynet.dispatch("lua", function(session, source, cmd, ...)
        local f = assert(CMD[cmd])
        if session ~= 0 then
            skynet.retpack(f(...))
        else
            f(...)
        end
    end)
end)

数值配置热更新示例

游戏数值配置

// config/game_values.json
{
    "max_level": 100,
    "exp_base": 100,
    "exp_growth": 1.5,
    "gold_per_kill": 10,
    "hp_per_level": 50,
    "mp_per_level": 20,
    "drop_rates": {
        "common": 0.7,
        "rare": 0.2,
        "epic": 0.08,
        "legendary": 0.02
    },
    "shop_items": {
        "potion_hp": {"price": 100, "effect": 50},
        "potion_mp": {"price": 150, "effect": 30}
    }
}

使用数值配置

-- service/game_logic.lua
local skynet = require "skynet"

local game_config = nil
local config_mgr

local CMD = {}

function CMD.init()
    config_mgr = skynet.uniqueservice("config_mgr")
    
    -- 加载配置
    skynet.call(config_mgr, "lua", "load", "game_values", "./config/game_values.json")
    game_config = skynet.call(config_mgr, "lua", "get", "game_values")
    
    -- 订阅配置变更
    local self = skynet.self()
    skynet.call(config_mgr, "lua", "watch", "game_values", self, "_on_config_changed")
end

-- 配置变更回调
function CMD._on_config_changed(name, new_config)
    skynet.error("游戏数值配置已更新")
    game_config = new_config
    -- 可以在这里做一些配置变更后的处理
end

-- 计算升级所需经验
function CMD.get_exp_required(level)
    return math.floor(game_config.exp_base * (game_config.exp_growth ^ (level - 1)))
end

-- 计算属性
function CMD.calc_stats(level)
    return {
        max_hp = 100 + (level - 1) * game_config.hp_per_level,
        max_mp = 50 + (level - 1) * game_config.mp_per_level,
    }
end

-- 掉落计算
function CMD.roll_drop()
    local roll = math.random()
    local rates = game_config.drop_rates
    
    if roll < rates.legendary then
        return "legendary"
    elseif roll < rates.legendary + rates.epic then
        return "epic"
    elseif roll < rates.legendary + rates.epic + rates.rare then
        return "rare"
    else
        return "common"
    end
end

skynet.start(function()
    CMD.init()
    
    skynet.dispatch("lua", function(session, source, cmd, ...)
        local f = assert(CMD[cmd])
        if session ~= 0 then
            skynet.retpack(f(...))
        else
            f(...)
        end
    end)
end)

代码版本管理

版本控制服务

-- service/version_mgr.lua
local skynet = require "skynet"
local cjson = require "cjson"

local VERSION_FILE = "./config/version.json"

local versions = {
    server = "1.0.0",
    config = "1.0.0",
    protocol = "1.0.0",
}

local update_history = {}

local CMD = {}

function CMD.get_version(component)
    if component then
        return versions[component]
    end
    return versions
end

function CMD.set_version(component, version)
    local old_version = versions[component]
    versions[component] = version
    
    -- 记录更新历史
    table.insert(update_history, {
        component = component,
        old_version = old_version,
        new_version = version,
        timestamp = os.time(),
    })
    
    -- 保存到文件
    save_versions()
    
    skynet.error(string.format("版本更新: %s %s -> %s", 
        component, old_version, version))
    return true
end

function CMD.get_history()
    return update_history
end

function save_versions()
    local file = io.open(VERSION_FILE, "w")
    if file then
        file:write(cjson.encode(versions))
        file:close()
    end
end

function load_versions()
    local file = io.open(VERSION_FILE, "r")
    if file then
        local content = file:read("*a")
        file:close()
        local ok, data = pcall(cjson.decode, content)
        if ok then
            versions = data
        end
    end
end

skynet.start(function()
    load_versions()
    skynet.register(".version_mgr")
    
    skynet.dispatch("lua", function(session, source, cmd, ...)
        local f = assert(CMD[cmd])
        if session ~= 0 then
            skynet.retpack(f(...))
        else
            f(...)
        end
    end)
end)

完整的更新流程

更新脚本

#!/bin/bash
# scripts/hotfix.sh - Skynet 热更新脚本

set -e

NODE=$1
UPDATE_TYPE=$2
TARGET=$3

if [ -z "$NODE" ] || [ -z "$UPDATE_TYPE" ] || [ -z "$TARGET" ]; then
    echo "Usage: $0 <node> <type> <target>"
    echo "  type: service | module | config | script"
    echo ""
    echo "Examples:"
    echo "  $0 node1 service game_logic"
    echo "  $0 node1 module business"
    echo "  $0 node1 config game_values"
    echo "  $0 node1 script scripts/migrate_v2.lua"
    exit 1
fi

# 连接到 Skynet 调试控制台
CONSOLE_PORT=$(grep console config.$NODE | awk -F: '{print $2}' | tr -d ' "')

echo "连接到节点 $NODE (端口 $CONSOLE_PORT)..."

# 通过调试控制台执行更新命令
case $UPDATE_TYPE in
    service)
        echo "更新服务: $TARGET"
        echo "call .updater lua update_service $TARGET" | nc localhost $CONSOLE_PORT
        ;;
    module)
        echo "更新模块: $TARGET"
        echo "call .updater lua update_module $TARGET" | nc localhost $CONSOLE_PORT
        ;;
    config)
        echo "更新配置: $TARGET"
        echo "call .config_mgr lua load $TARGET ./config/$TARGET.json" | nc localhost $CONSOLE_PORT
        ;;
    script)
        echo "执行脚本: $TARGET"
        echo "call .updater lua run_script $TARGET" | nc localhost $CONSOLE_PORT
        ;;
    *)
        echo "未知更新类型: $UPDATE_TYPE"
        exit 1
        ;;
esac

echo "更新完成"

更新流程示例

更新流程:

1. 准备阶段
   - 备份当前代码和配置
   - 准备更新内容
   - 准备数据迁移脚本

2. 执行阶段
   - 上传新代码到服务器
   - 执行热更新脚本
   - 验证更新结果

3. 验证阶段
   - 检查日志是否有报错
   - 验证功能是否正常
   - 监控系统指标

4. 回滚(如果需要)
   - 恢复备份的代码
   - 重新加载
   - 验证回滚结果

数据迁移

迁移脚本示例

-- scripts/migrate_user_v2.lua
-- 用户数据从 v1 迁移到 v2

local skynet = require "skynet"

skynet.start(function()
    local mysql_mgr = skynet.uniqueservice("mysql_mgr")
    
    -- v2 新增字段
    skynet.call(mysql_mgr, "lua", "query", [[
        ALTER TABLE users
        ADD COLUMN vip_level INT DEFAULT 0,
        ADD COLUMN last_login TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    ]])
    
    -- 迁移数据
    skynet.call(mysql_mgr, "lua", "query", [[
        UPDATE users
        SET vip_level = CASE
            WHEN total_recharge >= 10000 THEN 5
            WHEN total_recharge >= 5000 THEN 4
            WHEN total_recharge >= 1000 THEN 3
            WHEN total_recharge >= 100 THEN 2
            WHEN total_recharge >= 10 THEN 1
            ELSE 0
        END
    ]])
    
    skynet.error("用户数据迁移完成")
end)

安全热更新

更新权限验证

-- lualib/update_auth.lua
local skynet = require "skynet"
local crypt = require "skynet.crypt"

local AUTH_KEY = "your-secret-key"

local UpdateAuth = {}

-- 生成更新令牌
function UpdateAuth.generate_token(operator)
    local timestamp = os.time()
    local data = operator .. ":" .. timestamp
    local signature = crypt.hmac_sha1(AUTH_KEY, data)
    return string.format("%s:%d:%s", operator, timestamp, crypt.base64encode(signature))
end

-- 验证令牌
function UpdateAuth.verify_token(token)
    local operator, timestamp, signature = string.match(token, "([^:]+):(%d+):(.+)")
    if not operator or not timestamp or not signature then
        return false, "令牌格式错误"
    end
    
    -- 检查时间(5 分钟内有效)
    if os.time() - tonumber(timestamp) > 300 then
        return false, "令牌已过期"
    end
    
    -- 验证签名
    local data = operator .. ":" .. timestamp
    local expected = crypt.base64encode(crypt.hmac_sha1(AUTH_KEY, data))
    if signature ~= expected then
        return false, "签名验证失败"
    end
    
    return true, operator
end

return UpdateAuth

总结

本教程详细介绍了 Skynet 的热更新机制:

  1. 代码热更新:模块重加载、服务代码更新
  2. 配置热更新:配置文件监控、动态更新
  3. 数据迁移:版本升级时的数据转换
  4. 安全机制:权限验证、回滚机制
  5. 运维工具:更新脚本、版本管理

参考资料

  1. Skynet 热更新讨论:https://github.com/cloudwu/skynet/issues
  2. Lua 模块加载机制
  3. 游戏服务器热更新最佳实践

继续阅读

探索更多技术文章

浏览归档,发现更多关于系统设计、工具链和工程实践的内容。

全部文章 返回首页