Service 出现ANR的原因

Android知识总结

一、ANR出现原因

Android 系统中, ActivityManagerService( 简称 AMS)和WindowManagerService( 简 称 WMS)会检测App的响应时间,如果App在特定时间无法相应屏幕触摸或键盘输入时间,或者特定事件没有处理完毕,就会出现 ANR。

1、以下四个条件都可以造成 ANR 发生:

  • InputDispatching Timeout:5 秒内无法响应屏幕触摸事件或键盘输入事件
  • BroadcastQueue Timeout :在执行前台广播(BroadcastReceiver)的 onReceive()函数时 10 秒没有处理完成,后台为 60 秒。
  • Service Timeout :前台服务 20 秒内,后台服务在 200 秒内没有执行完毕。
  • ContentProvider Timeout :ContentProvider 的 publish 在 10s 内没进行完。

二、Service ANR 源码分析

1、Service 造成的 e Service Timeout

Service Timeout 是位于 ActivityManager 线程中的 AMS.MainHandler 收到 SERVICE_TIMEOUT_MSG 消息时触发。

2、发送延时消息

Service 进程 attach 到 system_server 进程的过程中会调用realStartServiceLocked ,紧接着mAm.mHandler.sendMessageAtTime() 来发送一个延时消息,延时的时常是定义好的,如前台 Service 的 20 秒。ActivityManager 线程中的 AMS.MainHandler 收到 SERVICE_TIMEOUT_MSG 消息时会触发。

  • Service 是在AMS 中交给 ActiveServices 进行管理的。

调用 ActiveServices.realStartServiceLocked 方法

IApplicationThread thread; //ActivityThread 的内部类

private final void realStartServiceLocked(ServiceRecord r, ProcessRecord app, boolean execInFg) throws RemoteException {
    ...
    //发送 delay 消息(SERVICE_TIMEOUT_MSG
    bumpServiceExecutingLocked(r, execInFg, "create");
    
    try {
        if (LOG_SERVICE_START_STOP) {
        ...
        //最终执行服务的 onCreate()方法
        app.thread.scheduleCreateService(r, r.serviceInfo,
                mAm.compatibilityInfoForPackage(r.serviceInfo.applicationInfo),
                app.getReportedProcState());
        r.postNotification();
        created = true;
    } catch (DeadObjectException e) {
        Slog.w(TAG, "Application dead when creating service " + r);
        mAm.appDiedLocked(app, "Died when creating service");
        throw e;
    } finally {
        ..
    }
    ...
}
  • 发送延时消息

ActiveServices.java

private final void bumpServiceExecutingLocked(ServiceRecord r, boolean fg, String why) {
    ...
    long now = SystemClock.uptimeMillis();
    if (r.executeNesting == 0) {
        r.executeFg = fg;
        ServiceState stracker = r.getTracker();
        if (stracker != null) {
            stracker.setExecuting(true, mAm.mProcessStats.getMemFactorLocked(), now);
        }
        if (r.app != null) {
            r.app.executingServices.add(r);
            r.app.execServicesFg |= fg;
            if (timeoutNeeded && r.app.executingServices.size() == 1) {
                scheduleServiceTimeoutLocked(r.app);
            }
        }
    } else if (r.app != null && fg && !r.app.execServicesFg) {
        r.app.execServicesFg = true;
        if (timeoutNeeded) {
            scheduleServiceTimeoutLocked(r.app);
        }
    }
    r.executeFg |= fg;
    r.executeNesting++;
    r.executingStart = now;
}
static final int SERVICE_TIMEOUT = 20*1000; //前台服务 20 秒内
static final int SERVICE_BACKGROUND_TIMEOUT = SERVICE_TIMEOUT * 10; //后台服务在 200 秒
void scheduleServiceTimeoutLocked(ProcessRecord proc) {
    if (proc.executingServices.size() == 0 || proc.thread == null) {
        return;
    } 
    Message msg = mAm.mHandler.obtainMessage(
            ActivityManagerService.SERVICE_TIMEOUT_MSG);
    msg.obj = proc;
    //发送延时消息
    //当超时后仍没有 remove 该SERVICE_TIMEOUT_MSG 消息,则执行 service Timeout 流程
    mAm.mHandler.sendMessageDelayed(msg,
            proc.execServicesFg ? SERVICE_TIMEOUT : SERVICE_BACKGROUND_TIMEOUT);
}

进入 ActivityManagerService#MainHandler处理消息

public class ActivityManagerService extends IActivityManager.Stub
        implements Watchdog.Monitor, BatteryStatsImpl.BatteryCallback {
    final MainHandler mHandler; 
    final ActiveServices mServices;

    final class MainHandler extends Handler {
        public MainHandler(Looper looper) {
            super(looper, null, true);
        }
        
        @Override
        public void handleMessage(Message msg) {
            switch (msg.what) {
            case GC_BACKGROUND_PROCESSES_MSG: {
                synchronized (ActivityManagerService.this) {
                    performAppGcsIfAppropriateLocked();
                }
            } break;
            case SERVICE_TIMEOUT_MSG: { //处理 service 的超时时间 ANR
                mServices.serviceTimeout((ProcessRecord)msg.obj);
            } break;
            ...
        
        }
    }
}

进入ActiveServices#serviceTimeout 进行处理

void serviceTimeout(ProcessRecord proc) {
    String anrMessage = null;
    synchronized(mAm) {
        if (proc.isDebugging()) {
            // The app's being debugged, ignore timeout.
            return;
        }
        if (proc.executingServices.size() == 0 || proc.thread == null) {
            return;
        }
        final long now = SystemClock.uptimeMillis();
        final long maxTime =  now -
                (proc.execServicesFg ? SERVICE_TIMEOUT : SERVICE_BACKGROUND_TIMEOUT);
        ServiceRecord timeout = null;
        long nextTime = 0;
        for (int i=proc.executingServices.size()-1; i>=0; i--) {
            ServiceRecord sr = proc.executingServices.valueAt(i);
            if (sr.executingStart < maxTime) {
                timeout = sr;
                break;
            }
            if (sr.executingStart > nextTime) {
                nextTime = sr.executingStart;
            }
        }
        if (timeout != null && mAm.mProcessList.mLruProcesses.contains(proc)) {
            Slog.w(TAG, "Timeout executing service: " + timeout);
            StringWriter sw = new StringWriter();
            PrintWriter pw = new FastPrintWriter(sw, false, 1024);
            pw.println(timeout);
            timeout.dump(pw, "    ");
            pw.close();
            mLastAnrDump = sw.toString();
            mAm.mHandler.removeCallbacks(mLastAnrDumpClearer);
            mAm.mHandler.postDelayed(mLastAnrDumpClearer, LAST_ANR_LIFETIME_DURATION_MSECS);
            //异常 消息
            anrMessage = "executing service " + timeout.shortInstanceName;
        } else {
            Message msg = mAm.mHandler.obtainMessage(
                    ActivityManagerService.SERVICE_TIMEOUT_MSG);
            msg.obj = proc;
            mAm.mHandler.sendMessageAtTime(msg, proc.execServicesFg
                    ? (nextTime+SERVICE_TIMEOUT) : (nextTime + SERVICE_BACKGROUND_TIMEOUT));
        }
    }

    if (anrMessage != null) {
        //处理 NAR
        mAm.mAnrHelper.appNotResponding(proc, anrMessage);
    }
}

无论是四大组件或者进程等只要发生ANR,最终都会调用 AMS.appNotResponding() 方法。

class AnrHelper {
    //保存 ANR 信息
    private final ArrayList<AnrRecord> mAnrRecords = new ArrayList<>();
    private final AtomicBoolean mRunning = new AtomicBoolean(false);

    void appNotResponding(ProcessRecord anrProcess, String annotation) {
        appNotResponding(anrProcess, null /* activityShortComponentName */, null /* aInfo */,
                null /* parentShortComponentName */, null /* parentProcess */,
                false /* aboveSystem */, annotation);
    }

    void appNotResponding(ProcessRecord anrProcess, String activityShortComponentName,
            ApplicationInfo aInfo, String parentShortComponentName,
            WindowProcessController parentProcess, boolean aboveSystem, String annotation) {
        synchronized (mAnrRecords) {
            mAnrRecords.add(new AnrRecord(anrProcess, activityShortComponentName, aInfo,
                    parentShortComponentName, parentProcess, aboveSystem, annotation));
        }
        startAnrConsumerIfNeeded();
    }
    
    private void startAnrConsumerIfNeeded() {
        if (mRunning.compareAndSet(false, true)) {
            new AnrConsumerThread().start();
        }
    }

    //执行 ANR 的线程
    private class AnrConsumerThread extends Thread {
        AnrConsumerThread() {
            super("AnrConsumer");
        }

        private AnrRecord next() {
            synchronized (mAnrRecords) {
                return mAnrRecords.isEmpty() ? null : mAnrRecords.remove(0);
            }
        }

        @Override
        public void run() {
            AnrRecord r;
            while ((r = next()) != null) {
                final long startTime = SystemClock.uptimeMillis();
                // If there are many ANR at the same time, the latency may be larger. If the latency
                // is too large, the stack trace might not be meaningful.
                final long reportLatency = startTime - r.mTimestamp;
                final boolean onlyDumpSelf = reportLatency > EXPIRED_REPORT_TIME_MS;
                r.appNotResponding(onlyDumpSelf);
                final long endTime = SystemClock.uptimeMillis();
                Slog.d(TAG, "Completed ANR of " + r.mApp.processName + " in "
                        + (endTime - startTime) + "ms, latency " + reportLatency
                        + (onlyDumpSelf ? "ms (expired, only dump ANR app)" : "ms"));
            }

            mRunning.set(false);
            synchronized (mAnrRecords) {
                // The race should be unlikely to happen. Just to make sure we don't miss.
                if (!mAnrRecords.isEmpty()) {
                    startAnrConsumerIfNeeded();
                }
            }
        }
    }

    //存储 ANR 信息类
    private static class AnrRecord {
        final ProcessRecord mApp;
        final String mActivityShortComponentName;
        final String mParentShortComponentName;
        final String mAnnotation;
        final ApplicationInfo mAppInfo;
        final WindowProcessController mParentProcess;
        final boolean mAboveSystem;
        final long mTimestamp = SystemClock.uptimeMillis();

        AnrRecord(ProcessRecord anrProcess, String activityShortComponentName,
                ApplicationInfo aInfo, String parentShortComponentName,
                WindowProcessController parentProcess, boolean aboveSystem, String annotation) {
            mApp = anrProcess;
            mActivityShortComponentName = activityShortComponentName;
            mParentShortComponentName = parentShortComponentName;
            mAnnotation = annotation;
            mAppInfo = aInfo;
            mParentProcess = parentProcess;
            mAboveSystem = aboveSystem;
        }

        void appNotResponding(boolean onlyDumpSelf) {
            mApp.appNotResponding(mActivityShortComponentName, mAppInfo,
                    mParentShortComponentName, mParentProcess, mAboveSystem, mAnnotation,
                    onlyDumpSelf);
        }
    }
}
  • 进入目标进程的主线程创建 Service

进入 ActivityThread#ApplicationThread#scheduleCreateService 的方法。

public final void scheduleCreateService(IBinder token,
        ServiceInfo info, CompatibilityInfo compatInfo, int processState) {
    updateProcessState(processState, false);
    CreateServiceData s = new CreateServiceData();
    s.token = token;
    s.info = info;
    s.compatInfo = compatInfo;
    //发送 CREATE_SERVICE 消息
    sendMessage(H.CREATE_SERVICE, s);
}

调用 ActivityThread#H 类发送消息

class H extends Handler {
    public void handleMessage(Message msg) 
        switch (msg.what) {
            case CREATE_SERVICE:
                if (Trace.isTagEnabled(Trace.TRACE_TAG_ACTIVITY_MANAGER)) {
                    Trace.traceBegin(Trace.TRACE_TAG_ACTIVITY_MANAGER,
                            ("serviceCreate: " + String.valueOf(msg.obj)));
                }
                handleCreateService((CreateServiceData)msg.obj);
                Trace.traceEnd(Trace.TRACE_TAG_ACTIVITY_MANAGER);
                break;
        }
}
private void handleCreateService(CreateServiceData data) {
    unscheduleGcIdler();

    LoadedApk packageInfo = getPackageInfoNoCheck(
            data.info.applicationInfo, data.compatInfo);
    Service service = null;
    try {
        if (localLOGV) Slog.v(TAG, "Creating service " + data.info.name);
        //创建 ContextImpl 对象
        ContextImpl context = ContextImpl.createAppContext(this, packageInfo);
        //创建 Application 对象
        Application app = packageInfo.makeApplication(false, mInstrumentation);
        java.lang.ClassLoader cl = packageInfo.getClassLoader();
        service = packageInfo.getAppFactory()
                .instantiateService(cl, data.info.name, data.intent);
        // Service resources must be initialized with the same loaders as the application
        // context.
        context.getResources().addLoaders(
                app.getResources().getLoaders().toArray(new ResourcesLoader[0]));

        context.setOuterContext(service);
        service.attach(context, this, data.info.name, data.token, app,
                ActivityManager.getService());
        //调用服务 onCreate()方法
        service.onCreate();
        mServices.put(data.token, service);
        try {
            //取消 AMS.MainHandler 的延时消息
            ActivityManager.getService().serviceDoneExecuting(
                    data.token, SERVICE_DONE_EXECUTING_ANON, 0, 0);
        } catch (RemoteException e) {
            throw e.rethrowFromSystemServer();
        }
    } catch (Exception e) {
        if (!mInstrumentation.onException(service, e)) {
            throw new RuntimeException(
                "Unable to create service " + data.info.name
                + ": " + e.toString(), e);
        }
    }
}

这个方法中会创建目标服务对象,以及回调常用的 Service 的 onCreate() 方法,紧接着通过 serviceDoneExecuting() 回到 system_server 执行取消 AMS.MainHandler 的延时消息

3、回到 system_server 进程执行取消 AMS.MainHandler 的延时消息。

进入ActivityManagerService#serviceDoneExecuting 方法

final ActiveServices mServices;

public void serviceDoneExecuting(IBinder token, int type, int startId, int res) {
    synchronized(this) {
        if (!(token instanceof ServiceRecord)) {
            Slog.e(TAG, "serviceDoneExecuting: Invalid service token=" + token);
            throw new IllegalArgumentException("Invalid service token");
        }
        mServices.serviceDoneExecutingLocked((ServiceRecord)token, type, startId, res);
    }
}

调用 ActiveServices#serviceDoneExecutingLocked 方法

void serviceDoneExecutingLocked(ServiceRecord r, int type, int startId, int res) {
    boolean inDestroying = mDestroyingServices.contains(r);
    if (r != null) {
        if (type == ActivityThread.SERVICE_DONE_EXECUTING_START) {
            r.callStart = true;
            switch (res) {
                case Service.START_STICKY_COMPATIBILITY:
                case Service.START_STICKY: {
                    // We are done with the associated start arguments.
                    r.findDeliveredStart(startId, false, true);
                    // Don't stop if killed.
                    r.stopIfKilled = false;
                    break;
                }
                case Service.START_NOT_STICKY: {
                    // We are done with the associated start arguments.
                    r.findDeliveredStart(startId, false, true);
                    if (r.getLastStartId() == startId) {
                        r.stopIfKilled = true;
                    }
                    break;
                }
                case Service.START_REDELIVER_INTENT: {
                    ServiceRecord.StartItem si = r.findDeliveredStart(startId, false, false);
                    if (si != null) {
                        si.deliveryCount = 0;
                        si.doneExecutingCount++;
                        // Don't stop if killed.
                        r.stopIfKilled = true;
                    }
                    break;
                }
                case Service.START_TASK_REMOVED_COMPLETE: {
                    r.findDeliveredStart(startId, true, true);
                    break;
                }
                default:
                    throw new IllegalArgumentException(
                            "Unknown service start result: " + res);
            }
            if (res == Service.START_STICKY_COMPATIBILITY) {
                r.callStart = false;
            }
        } else if (type == ActivityThread.SERVICE_DONE_EXECUTING_STOP) {
            if (!inDestroying) {
                if (r.app != null) {
                    Slog.w(TAG, "Service done with onDestroy, but not inDestroying: "
                            + r + ", app=" + r.app);
                }
            } else if (r.executeNesting != 1) {
                Slog.w(TAG, "Service done with onDestroy, but executeNesting="
                        + r.executeNesting + ": " + r);
                // Fake it to keep from ANR due to orphaned entry.
                r.executeNesting = 1;
            }
        }
        final long origId = Binder.clearCallingIdentity();
        serviceDoneExecutingLocked(r, inDestroying, inDestroying);
        Binder.restoreCallingIdentity(origId);
    } else {
        Slog.w(TAG, "Done executing unknown service from pid "
                + Binder.getCallingPid());
    }
}
private void serviceDoneExecutingLocked(ServiceRecord r, boolean inDestroying,
        boolean finishing) {
    if (DEBUG_SERVICE) Slog.v(TAG_SERVICE, "<<< DONE EXECUTING " + r
            + ": nesting=" + r.executeNesting
            + ", inDestroying=" + inDestroying + ", app=" + r.app);
    else if (DEBUG_SERVICE_EXECUTING) Slog.v(TAG_SERVICE_EXECUTING,
            "<<< DONE EXECUTING " + r.shortInstanceName);
    r.executeNesting--;
    if (r.executeNesting <= 0) {
        if (r.app != null) {
            if (DEBUG_SERVICE) Slog.v(TAG_SERVICE,
                    "Nesting at 0 of " + r.shortInstanceName);
            r.app.execServicesFg = false;
            r.app.executingServices.remove(r);
            if (r.app.executingServices.size() == 0) {
                if (DEBUG_SERVICE || DEBUG_SERVICE_EXECUTING) Slog.v(TAG_SERVICE_EXECUTING,
                        "No more executingServices of " + r.shortInstanceName);
                //移除当前服务所在进程中没有正在执行的 service
                mAm.mHandler.removeMessages(ActivityManagerService.SERVICE_TIMEOUT_MSG, r.app);
            } else if (r.executeFg) {
                // Need to re-evaluate whether the app still needs to be in the foreground.
                for (int i=r.app.executingServices.size()-1; i>=0; i--) {
                    if (r.app.executingServices.valueAt(i).executeFg) {
                        r.app.execServicesFg = true;
                        break;
                    }
                }
            }
            if (inDestroying) {
                if (DEBUG_SERVICE) Slog.v(TAG_SERVICE,
                        "doneExecuting remove destroying " + r);
                mDestroyingServices.remove(r);
                r.bindings.clear();
            }
            mAm.updateOomAdjLocked(r.app, true, OomAdjuster.OOM_ADJ_REASON_UNBIND_SERVICE);
        }
        r.executeFg = false;
        if (r.tracker != null) {
            final int memFactor = mAm.mProcessStats.getMemFactorLocked();
            final long now = SystemClock.uptimeMillis();
            r.tracker.setExecuting(false, memFactor, now);
            r.tracker.setForeground(false, memFactor, now);
            if (finishing) {
                r.tracker.clearCurrentOwner(r, false);
                r.tracker = null;
            }
        }
        if (finishing) {
            if (r.app != null && !r.app.isPersistent()) {
                r.app.stopService(r);
                r.app.updateBoundClientUids();
                if (r.whitelistManager) {
                    updateWhitelistManagerLocked(r.app);
                }
            }
            r.setProcess(null);
        }
    }
}

此方法中 Service 逻辑处理完成则移除之前延时的消息SERVICE_TIMEOUT_MSG 。如果没有执行完毕不调用这个方法,则超时后会发出 SERVICE_TIMEOUT_MSG 来告知ANR 发生。

推荐阅读更多精彩内容