Android系统中固件崩溃后使用uevent机制重新加载固件 流程分析
【前言】
? ? Android系統(tǒng)為了保證各商業(yè)公司的利益,允許在系統(tǒng)中使用不開源的固件。因此我們可以看到比如用于音頻處理的、用于鍵盤支持的等等大量固件。既然這么多模塊要用到固件,那么我們也有必要來了解一下固件在崩潰后的自恢復(fù)過程。這篇博文是基于 Intel x86 平臺寫的,所分析的固件是 Intel 平臺的一款音頻 DSP 固件,使用 uevent 機制來收發(fā)固件恢復(fù)消息。所以如果讀者使用的其它平臺或其它固件,在內(nèi)容上也許會有所出入,但這不影響我們的分析思路。
【填充并發(fā)送uevent消息】
? ? 在設(shè)備檢測到固件崩潰之后,會調(diào)用恢復(fù)函數(shù)。在我使用的平臺下,這對應(yīng)的是?sst_do_recovery() 函數(shù)。在這個函數(shù)中,主要完成 填充uevent消息、dump固件崩潰信息、重置Intel平臺音頻配置、清除內(nèi)存中舊的固件內(nèi)容、發(fā)送uevent消息 這5件事。代碼如下:
void sst_do_recovery(struct intel_sst_drv *sst) {char iram_event[IRAM_EVENT_SIZE_MAX], dram_event[DRAM_EVENT_SIZE_MAX];char ddr_imr_event[DDR_EVENT_SIZE_MAX], event_type[EVENT_TYPE_SIZE_MAX];char *envp[NUM_EVENT_MAX];int env_offset = 0;pr_err("Audio: Intel SST engine encountered an unrecoverable error\n");snprintf(event_type, sizeof(event_type), "EVENT_TYPE=SST_CRASHED"); // 填充uevent消息envp[env_offset++] = event_type;snprintf(iram_event, sizeof(iram_event), "IRAM_DUMP_SIZE=%d", // 填充uevent消息sst->dump_buf.iram_buf.size);envp[env_offset++] = iram_event;snprintf(dram_event, sizeof(dram_event), "DRAM_DUMP_SIZE=%d", // 填充uevent消息sst->dump_buf.dram_buf.size);envp[env_offset++] = dram_event;if (sst->ddr != NULL) {snprintf(ddr_imr_event, sizeof(ddr_imr_event),"DDR_IMR_DUMP_SIZE=%d DDR_IMR_ADDRESS=%p", (sst->ddr_end - sst->ddr_base), sst->ddr);envp[env_offset++] = ddr_imr_event;}envp[env_offset] = NULL;kobject_uevent_env(&sst->dev->kobj, KOBJ_CHANGE, envp); // 發(fā)送uevent消息向上層報告固件已崩潰pr_err("SST Crash Uevent Sent!!\n");/** setting firmware state as RESET so that the firmware will get* redownloaded on next request.This is because firmare not responding* for 1 sec is equalant to some unrecoverable error of FW.*/pr_err("Audio: trying to reset the dsp now\n");mutex_lock(&sst->sst_lock);sst->sst_state = SST_RECOVERY; // 將Intel平臺的當(dāng)前狀態(tài)置為“恢復(fù)中”mutex_unlock(&sst->sst_lock);dump_stack(); // dump信息dump_sst_shim(sst); // dump信息mutex_lock(&sst->sst_lock);sst_stall_lpe_n_wait(sst);mutex_unlock(&sst->sst_lock);/* dump mailbox and sram */pr_debug("Audio: Dumping Mailbox IA to LPE...\n");dump_buffer_fromio(sst->ipc_mailbox, NUM_DWORDS); // dump信息pr_debug("Audio: Dumping Mailbox LPE to IA...\n");dump_buffer_fromio((sst->ipc_mailbox + sst->mailbox_recv_offset), // dump信息NUM_DWORDS);pr_debug("Audio: Dumping SRAM CHECKPOINT...\n");dump_buffer_fromio((sst->mailbox +sst->pdata->debugfs_data->checkpoint_offset),DUMP_SRAM_CHECKPOINT_DWORDS);if (sst_drv_ctx->ops->set_bypass) {mutex_lock(&sst->sst_lock);sst_drv_ctx->ops->set_bypass(true);dump_ram_area(sst, &(sst->dump_buf), SST_IRAM); // dump信息dump_ram_area(sst, &(sst->dump_buf), SST_DRAM); // dump信息sst_drv_ctx->ops->set_bypass(false);mutex_unlock(&sst->sst_lock);}/* Send IPC to SCU to power gate and reset the LPE */sst_send_scu_reset_ipc(sst); // 重置Intel平臺配置pr_err("reset the pvt id from val %d\n", sst_drv_ctx->pvt_id);spin_lock(&sst_drv_ctx->block_lock);sst_drv_ctx->pvt_id = 0;spin_unlock(&sst_drv_ctx->block_lock);sst_dump_ipc_dispatch_lists(sst_drv_ctx); // dump信息sst_dump_rx_lists(sst_drv_ctx); // dump信息if (sst_drv_ctx->fw_in_mem) {pr_err("Clearing the cached FW copy...\n");kfree(sst_drv_ctx->fw_in_mem); // 清除內(nèi)存中舊的固件內(nèi)容sst_drv_ctx->fw_in_mem = NULL; // 清除內(nèi)存中舊的固件內(nèi)容sst_memcpy_free_resources(); // 清除內(nèi)存中舊的固件內(nèi)容kfree(sst_drv_ctx->fw_sg_list.src); // 清除內(nèi)存中舊的固件內(nèi)容kfree(sst_drv_ctx->fw_sg_list.dst); // 清除內(nèi)存中舊的固件內(nèi)容sst_drv_ctx->fw_sg_list.list_len = 0; // 清除內(nèi)存中舊的固件內(nèi)容}mutex_lock(&sst->sst_lock);sst->sst_state = SST_RESET; // 將Intel平臺的當(dāng)前狀態(tài)置為“正在重置”sst_stream_recovery(sst); // 重置Intel平臺音頻配置mutex_unlock(&sst->sst_lock);/* Delay is to ensure that the stream is closed before* powering on DAPM widget*/usleep_range(STREAM_CLOSE_DELAY_MIN, STREAM_CLOSE_DELAY_MAX);env_offset = 0;snprintf(event_type, sizeof(event_type), "EVENT_TYPE=SST_RECOVERY"); // 填充uevent消息envp[env_offset++] = event_type;envp[env_offset] = NULL;kobject_uevent_env(&sst->dev->kobj, KOBJ_CHANGE, envp); // 發(fā)送uevent消息通知上層開始重載固件pr_err("SST Recovery Uevent Sent!!\n");}【接收并處理uevent消息】
? ? 在 Android 系統(tǒng)中,底層發(fā)送的 uevent 消息在上層由 ueventd 進行接收和處理。ueventd 是 Android 系統(tǒng)啟動后就運行的一個服務(wù)進程,它通過 while死循環(huán) 不斷檢查系統(tǒng)是否接收到新的 uevent 消息,如果有就調(diào)用 handle_device_fd() 函數(shù)進行處理。我們可以在 system/core/init/ueventd.cpp 中找到 ueventd 的主函數(shù)。代碼如下:
int ueventd_main(int argc, char **argv) {/** init sets the umask to 077 for forked processes. We need to* create files with exact permissions, without modification by* the umask.*/umask(000);/* Prevent fire-and-forget children from becoming zombies.* If we should need to wait() for some children in the future* (as opposed to none right now), double-forking here instead* of ignoring SIGCHLD may be the better solution.*/signal(SIGCHLD, SIG_IGN);open_devnull_stdio();klog_init();klog_set_level(KLOG_NOTICE_LEVEL);NOTICE("ueventd started!\n");selinux_callback cb;cb.func_log = selinux_klog_callback;selinux_set_callback(SELINUX_CB_LOG, cb);std::string hardware = property_get("ro.hardware");ueventd_parse_config_file("/ueventd.rc");ueventd_parse_config_file(android::base::StringPrintf("/ueventd.%s.rc", hardware.c_str()).c_str());device_init();pollfd ufd;ufd.events = POLLIN;ufd.fd = get_device_fd();while (true) { // 使用死循環(huán),不斷查詢是否有新的消息需要處理ufd.revents = 0;int nr = poll(&ufd, 1, -1);if (nr <= 0) {continue;}if (ufd.revents & POLLIN) {handle_device_fd(); // 如果檢查到有待處理的消息,在這里進行處理}}return 0; }? ? handle_device_fd()函數(shù)主要負(fù)責(zé)解析 uevent 消息,然后將解析出的消息分別傳遞給 handle_device_event() 函數(shù)和 handle_firmware_event() 函數(shù)。后2者會分別檢查 uevent 消息是否是 device 類型或 firmware 類型,并且在滿足檢驗條件的情況下進行相應(yīng)操作。這些函數(shù)都可以在 /system/core/init/devices.cpp 文件中找到,代碼如下:
void handle_device_fd() {char msg[UEVENT_MSG_LEN+2];int n;while ((n = uevent_kernel_multicast_recv(device_fd, msg, UEVENT_MSG_LEN)) > 0) {if(n >= UEVENT_MSG_LEN) /* overflow -- discard */continue;msg[n] = '\0';msg[n+1] = '\0';struct uevent uevent;parse_event(msg, &uevent); // 從消息中解析出uevent事件,保存在uevent結(jié)構(gòu)體變量中if (selinux_status_updated() > 0) {struct selabel_handle *sehandle2;sehandle2 = selinux_android_file_context_handle();if (sehandle2) {selabel_close(sehandle);sehandle = sehandle2;}}handle_device_event(&uevent);handle_firmware_event(&uevent); // 檢查是否需要處理固件uevent事件} }? ? 因為我們是分析固件崩潰后重載的過程,所以來看看 handle_firmware_event() 函數(shù)。這個函數(shù)的內(nèi)容比較簡潔,在檢查傳遞來的 uevent 消息屬于 firmware 子系統(tǒng)和 add 操作后,創(chuàng)建一個子線程并調(diào)用 process_firmware_event() 函數(shù)對 uevent 消息進行最終的處理。代碼如下:
static void handle_firmware_event(struct uevent *uevent) {pid_t pid;if(strcmp(uevent->subsystem, "firmware")) // 固件uevent事件所屬的子系統(tǒng)參數(shù)值必須要是"firmware"return;if(strcmp(uevent->action, "add")) // 固件uevent事件所屬的動作參數(shù)值必須要是"add"return;/* we fork, to avoid making large memory allocations in init proper */pid = fork();if (!pid) {process_firmware_event(uevent); // 開始處理固件事件_exit(EXIT_SUCCESS);} else if (pid < 0) {ERROR("could not fork to process firmware event: %s\n", strerror(errno));} }? ? 不出意外地,在 proces_firmware_event() 函數(shù)中進行了讀寫文件節(jié)點和加載固件的操作。代碼如下:
static void process_firmware_event(struct uevent *uevent) {char *root, *loading, *data;int l, loading_fd, data_fd, fw_fd;size_t i;int booting = is_booting();INFO("firmware: loading '%s' for '%s'\n",uevent->firmware, uevent->path);l = asprintf(&root, SYSFS_PREFIX"%s/", uevent->path);if (l == -1)return;l = asprintf(&loading, "%sloading", root); // 獲取loading文件的路徑if (l == -1)goto root_free_out;l = asprintf(&data, "%sdata", root); // 獲取data文件的路徑if (l == -1)goto loading_free_out;loading_fd = open(loading, O_WRONLY|O_CLOEXEC); // 打開loading文件if(loading_fd < 0)goto data_free_out;data_fd = open(data, O_WRONLY|O_CLOEXEC); // 打開data文件if(data_fd < 0)goto loading_close_out;try_loading_again:for (i = 0; i < ARRAY_SIZE(firmware_dirs); i++) {char *file = NULL;l = asprintf(&file, "%s/%s", firmware_dirs[i], uevent->firmware); // 獲取固件文件路徑if (l == -1)goto data_free_out;fw_fd = open(file, O_RDONLY|O_CLOEXEC); // 打開固件文件free(file);if (fw_fd >= 0) {if(!load_firmware(fw_fd, loading_fd, data_fd)) // 加載固件INFO("firmware: copy success { '%s', '%s' }\n", root, uevent->firmware);elseINFO("firmware: copy failure { '%s', '%s' }\n", root, uevent->firmware);break;}}if (fw_fd < 0) {if (booting) {/* If we're not fully booted, we may be missing* filesystems needed for firmware, wait and retry.*/usleep(100000); // 如果固件加載失敗,并且系統(tǒng)仍處于啟動過程中,那么等待100ms后嘗試重新加載固件booting = is_booting();goto try_loading_again; // 重新加載固件}INFO("firmware: could not open '%s': %s\n", uevent->firmware, strerror(errno));write(loading_fd, "-1", 2);goto data_close_out;}close(fw_fd); data_close_out:close(data_fd); loading_close_out:close(loading_fd); data_free_out:free(data); loading_free_out:free(loading); root_free_out:free(root); }總結(jié)
以上是生活随笔為你收集整理的Android系统中固件崩溃后使用uevent机制重新加载固件 流程分析的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 挽救市场信誉度,三星正面回应手机爆炸缘由
- 下一篇: 2023.04.22更新大麦网移动端/M