×

Android端线上NativeCrash收集的两种方法(下)

96
kx5156
2016.08.11 18:36* 字数 455

上文中介绍了基于google-breakpad的NativeCrash日志收集方法的全过程,但其有个缺陷就是生成的通用SO和dmp日志比较大,对sdk大小有严格要求的APP不是很方便,且不利于收集线上用户的NativeCrash数据(需要在wifi情况下收集)。本文介绍另外一种基于linux信号处理的轻量级方法。

实现原理

基于linux信号异常处理的NativeCrash收集实现原理

上图给出了基于linux信号异常处理的Native崩溃捕获的实现原理,即当Native层代码发生崩溃崩溃时,会发送异常signal(主要有SIGILL/SIGSEGV/SIGBUS/SIGABRT/SIGSTKFLT等),系统捕获到异常signal后,回调信号异常处理函数(需事先注册),在异常处理函数中获取C层崩溃堆栈数据,通过JNI回调至JAVA层,进行解析、上传等操作。

注册信号处理函数如下:

int sigaction(int signum,const struct sigaction *act,struct sigaction *oldact);
struct sigaction {
    void (*sa_handler)(int);
    void (*sa_sigaction)(int,siginfo_t *,void *);
    sigset_t sa_mask;
    int sa_flag;
    void (*sa_restorer)(void);
}

此方案的难点在于如何在信号异常处理函数中获取native崩溃堆栈数据。网上提的比较多的方法是利用libcorkscrew.so来获取,但是libcorkscrew.so在Android4.4之后废弃了。当Android版本号大于4.4时,可以采用libbacktrace.so来获取崩溃数据。
如下给出信号异常处理函数的具体实现。

void _handleNativeCrash(const char *reason, struct siginfo *siginfo,
        void *sigcontext) {
    LOGD("_handleNativeCrash");
    JNIEnv *env = 0;
    int result = javaVM->GetEnv((void **) &env, JNI_VERSION_1_6);
    if (result == JNI_EDETACHED) {
        LOGW("Native crash occured in a non jvm-attached thread");
        result = javaVM->AttachCurrentThread(&env, NULL);
    }

    if (result != JNI_OK) {
        LOGE("Could not attach thread to Java VM for crash reporting.\n Crash was: %s", reason);
    } else if (androidversion < 21) {
       init();
        char lines[MAX_BACKTRACE_LINES_LENGTH] = { 0, };
        if (unwind_backtrace_signal_arch != NULL && siginfo != NULL) {
            LOGI("libcorkscrew.so start");
            map_info_t *map_info = acquire_my_map_info_list();
            backtrace_frame_t frames[256] = { 0, };
            backtrace_symbol_t symbols[256] = { 0, };
            const ssize_t size = unwind_backtrace_signal_arch(siginfo,
                    sigcontext, map_info, frames, 0, 255);
            get_backtrace_symbols(frames, size, symbols);

            for (int i = 0; i < size; ++i) {
                char line[MAX_BACKTRACE_LINE_LENGTH];
                const char *method = symbols[i].demangled_name;
                if (!method) {
                    method = symbols[i].symbol_name;
                }
                const char *file = symbols[i].map_name;
                if (!file) {
                    file = "<unknown>";
                }

                size_t fieldWidth = (MAX_BACKTRACE_LINE_LENGTH - 80) / 2;
                if (method) {
                    uintptr_t pc_offset = symbols[i].relative_pc
                            - symbols[i].relative_symbol_addr;
                    if (pc_offset) {
                        snprintf(line, MAX_BACKTRACE_LINE_LENGTH,
                                "#%02d  pc %08x  %.*s (%.*s+%u)", i,
                                symbols[i].relative_pc, fieldWidth, file,
                                fieldWidth, method, pc_offset);
                    } else {
                        snprintf(line, MAX_BACKTRACE_LINE_LENGTH,
                                "#%02d  pc %08x  %.*s (%.*s)", i,
                                symbols[i].relative_pc, fieldWidth, file,
                                fieldWidth, method);
                    }
                } else {
                    method = "<unknown>";
                    snprintf(line, MAX_BACKTRACE_LINE_LENGTH,
                            "#%02d  pc %08x  %.*s", i, symbols[i].relative_pc,
                            fieldWidth, file);
                }
                snprintf(lines, sizeof(lines), "%s \n%s", lines, line);

            }

            free_backtrace_symbols(symbols, size);
            release_my_map_info_list(map_info);
            LOGI("before call makeCrashReportMethod");
            env->CallStaticVoidMethod(applicationClass, makeCrashReportMethod,
            env->NewStringUTF(reason), env->NewStringUTF(lines), (jint) gettid());
            LOGI("after call makeCrashReportMethod");
        }

    }else if(android version >= 21){
      if(captureNativeCrashForAndroid5(sigcontext,reason) == -1){
                    LOGI("--libbacktrace fail");
       }                   
    }

}

void init(){
    void * libcorkscrew = dlopen("libcorkscrew.so", RTLD_LAZY | RTLD_LOCAL);
    if (libcorkscrew) {
        unwind_backtrace_signal_arch = (t_unwind_backtrace_signal_arch) dlsym(
                libcorkscrew, "unwind_backtrace_signal_arch");
        acquire_my_map_info_list = (t_acquire_my_map_info_list) dlsym(
                libcorkscrew, "acquire_my_map_info_list");
        release_my_map_info_list = (t_release_my_map_info_list) dlsym(
                libcorkscrew, "release_my_map_info_list");
        get_backtrace_symbols = (t_get_backtrace_symbols) dlsym(libcorkscrew,
                "get_backtrace_symbols");
        free_backtrace_symbols = (t_free_backtrace_symbols) dlsym(libcorkscrew,
                "free_backtrace_symbols");
    }
}

int captureNativeCrashForAndroid5(const void* ctx, const char* reason){
    char lines[MAX_BACKTRACE_LINES_LENGTH] = { 0, };
    void* libbacktrace = dlopen( "libbacktrace.so", RTLD_LAZY |RTLD_LOCAL );
    if ( !libbacktrace ){
        return -1;
    }
    Backtrace* (*create)(int,int,void*);
    *(void**)&create = dlsym( libbacktrace, "_ZN9Backtrace6CreateEiiP12BacktraceMap" );
    if ( !create ){
        return -1;
    }
    Backtrace* t = create(getpid(), 0, NULL );
    if ( !t ){
        return -1;
    }

    int ret = t->Unwind(0, (ucontext*)ctx);
    if ( !ret ) {
        return -1;
    }
    size_t count = t->NumFrames();
    for ( size_t i=0; i < MAX_BACKTRACE_LINE_LENGTH; i++ ) {
        std::string line = t->FormatFrameData(i);
        if(line == ""){
            break;
        }
        snprintf(lines, sizeof(lines), "%s \n%s", lines, line.c_str());
    }
    dumpFile(reason,lines);
    return 0;
}

Android.mk代码如下

LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
LOCAL_MODULE    := nativecrash
LOCAL_SRC_FILES := NativeCrashHandler.cpp 
LOCAL_CPPFLAGS :=  -Os -fvisibility=hidden 
LOCAL_CFLAGS :=  -Os -fvisibility=hidden 
LOCAL_CFLAGS += -Wno-psabi
LOCAL_CFLAGS += -funwind-tables -g
LOCAL_LDLIBS += -llog -ldl -landroid

Application.mk代码如下

APP_ABI := armeabi-v7a armeabi mips x86 
APP_STL := stlport_static
APP_OPTIM := release
APP_CPPFLAGS += -Wno-error=format-security

至此两种NativeCrash收集的方法介绍完毕,两种方法各有优缺点,基于google-breakpad的方法体量较大,但获取的信息较全面。基于异常信号处理方法体量小,但信息不全面,只有崩溃线程的堆栈信息,不利于定位多线程崩溃的问题。

日记本
Web note ad 1