KafkaConsumer核心字段(本文持续更新)

Achtung:

内容比较多,此文会逐步增加内容至完整。

KafkaConsumer basic API
consumer.subscribe(Arrays.asList(topic));
consumer.assign(Arrays.asList(topicPartition));
consumer.seek(topicPartition, forcedOffset);
poll-committed
public static void assignDemo(KafkaConsumer<String, String> consumer) {
        TopicPartition tp = new TopicPartition(topic, 0);
        consumer.assign(Arrays.asList(tp));
        long lastConsumedOffset = -1;

        while (isRunning.get()){
            ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(1L));
            if (records.isEmpty()) break;

            List<ConsumerRecord<String, String>> partitionRecords = records.records(tp);
            lastConsumedOffset =  partitionRecords.get(partitionRecords.size() - 1).offset();
            consumer.commitSync(); // commit

            System.out.println("consumed offset is: " + lastConsumedOffset);  // 76
            System.out.println();

            OffsetAndMetadata offsetAndMetadata = consumer.committed(tp);
            System.out.println("committed offset is: " + offsetAndMetadata.offset());  // 77
            System.out.println();

            long position = consumer.position(tp);
            System.out.println("offset of next record: " + position);  // 77

        }
    }

KafkaConsumer

public class KafkaConsumer<K, V> implements Consumer<K, V> {

// visible for testing
    KafkaConsumer(LogContext logContext,
                  String clientId,
                  ConsumerCoordinator coordinator,
                  Deserializer<K> keyDeserializer,
                  Deserializer<V> valueDeserializer,
                  Fetcher<K, V> fetcher,
                  ConsumerInterceptors<K, V> interceptors,
                  Time time,
                  ConsumerNetworkClient client,
                  Metrics metrics,
                  SubscriptionState subscriptions,
                  ConsumerMetadata metadata,
                  long retryBackoffMs,
                  long requestTimeoutMs,
                  int defaultApiTimeoutMs,
                  List<ConsumerPartitionAssignor> assignors,
                  String groupId) {
        this.log = logContext.logger(getClass());
        this.clientId = clientId;
        this.coordinator = coordinator;
        this.keyDeserializer = keyDeserializer;
        this.valueDeserializer = valueDeserializer;
        this.fetcher = fetcher;
        this.isolationLevel = IsolationLevel.READ_UNCOMMITTED;
        this.interceptors = Objects.requireNonNull(interceptors);
        this.time = time;
        this.client = client;
        this.metrics = metrics;
        this.subscriptions = subscriptions;
        this.metadata = metadata;
        this.retryBackoffMs = retryBackoffMs;
        this.requestTimeoutMs = requestTimeoutMs;
        this.defaultApiTimeoutMs = defaultApiTimeoutMs;
        this.assignors = assignors;
        this.groupId = Optional.ofNullable(groupId);
        this.kafkaConsumerMetrics = new KafkaConsumerMetrics(metrics, "consumer");
    }
  // ...
}

上面是constructor,核心字段如下:

  1. ConsumerNetworkClient
  2. SubscriptionState
  3. ConsumerCoordinator
  4. ConsumerPartitionAssignor
  5. Fetcher

以及三个重要东西:

  1. Heatbeat分析
  2. Rebalance实现
  3. offset操作

ConsumerNetworkClient

producer-kafka-consumer

从Producer -- Kafka Server -- Consumer流程可以看出, ConsumerNetworkClient是IO关键,对应ProducerNetworkClient; 最核心的两个方法分别是trySendpoll; 其中poll可以分为7步:

  1. trySend
  2. send
  3. poll
  4. checkDisconnects
  5. maybeTriggerWakeup
  6. trySend
  7. firePendingCompletedRequests

trySend对每个Node遍历对应的ClientRequestready检验consumer和Node的连接;send放入InFlightRequests队列中等待;也放入KafkaChannelsend字段等待发送,并iterator.remove()

public class ConsumerNetworkClient implements Closeable{
    // .....
    // timer: poll最长阻塞时间
    public void poll(Timer timer, ConsumerNetworkClient.PollCondition pollCondition, boolean disableWakeup) {
        this.firePendingCompletedRequests();
        this.lock.lock();

        try {
            this.handlePendingDisconnects();
            long pollDelayMs = this.trySend(timer.currentTimeMs());
            if (this.pendingCompletion.isEmpty() && (pollCondition == null || pollCondition.shouldBlock())) {
                long pollTimeout = Math.min(timer.remainingMs(), pollDelayMs);
                if (this.client.inFlightRequestCount() == 0) {
                    pollTimeout = Math.min(pollTimeout, this.retryBackoffMs);
                }

                this.client.poll(pollTimeout, timer.currentTimeMs());
            } else {
                this.client.poll(0L, timer.currentTimeMs());
            }

            timer.update();
            this.checkDisconnects(timer.currentTimeMs());
            if (!disableWakeup) {
                this.maybeTriggerWakeup();
            }

            this.maybeThrowInterruptException();
            // KafkaChannel.send可能已经发出去了,也可能与新的Node建立了网络连接,这次再次调用trySend
            this.trySend(timer.currentTimeMs()); 
            this.failExpiredRequests(timer.currentTimeMs());
            this.unsent.clean();
        } finally {
            this.lock.unlock();
        }

        this.firePendingCompletedRequests();
        this.metadata.maybeThrowAnyException();
    }
    // ...
    long trySend(long now) {
        long pollDelayMs = (long)this.maxPollTimeoutMs;
        Iterator var5 = this.unsent.nodes().iterator();

        while(var5.hasNext()) {
            Node node = (Node)var5.next();
            Iterator<ClientRequest> iterator = this.unsent.requestIterator(node);
            if (iterator.hasNext()) {
                pollDelayMs = Math.min(pollDelayMs, this.client.pollDelayMs(node, now));
            }

            while(iterator.hasNext()) {
                ClientRequest request = (ClientRequest)iterator.next();
                if (!this.client.ready(node, now)) {
                    break;
                }

                this.client.send(request, now);
                iterator.remove();
            }
        }

        return pollDelayMs;
    }
    // ...
}

SubscriptionState

追踪partition和offset状态。当前版本是kafka-clients 2.5.1, 相比0.9.x简单了不少

public class SubscriptionState {
    private static final String SUBSCRIPTION_EXCEPTION_MESSAGE = "Subscription to topics, partitions and pattern are mutually exclusive";
    private final Logger log;
    private SubscriptionState.SubscriptionType subscriptionType;
    private Pattern subscribedPattern;
    private Set<String> subscription;
    private Set<String> groupSubscription;
    private final PartitionStates<SubscriptionState.TopicPartitionState> assignment;
    private final OffsetResetStrategy defaultResetStrategy;
    private ConsumerRebalanceListener rebalanceListener;
    private int assignmentId = 0;

    // 比较核心相互override的几个小方法
    public synchronized boolean subscribe(Set<String> topics, ConsumerRebalanceListener listener) {
        this.registerRebalanceListener(listener);
        this.setSubscriptionType(SubscriptionState.SubscriptionType.AUTO_TOPICS);
        return this.changeSubscription(topics);
    }

    public synchronized void subscribe(Pattern pattern, ConsumerRebalanceListener listener) {
        this.registerRebalanceListener(listener);
        this.setSubscriptionType(SubscriptionState.SubscriptionType.AUTO_PATTERN);
        this.subscribedPattern = pattern;
    }

    public synchronized boolean subscribeFromPattern(Set<String> topics) {
        if (this.subscriptionType != SubscriptionState.SubscriptionType.AUTO_PATTERN) {
            throw new IllegalArgumentException("Attempt to subscribe from pattern while subscription type set to " + this.subscriptionType);
        } else {
            return this.changeSubscription(topics);
        }
    }
    // core: changeSubscription
    private boolean changeSubscription(Set<String> topicsToSubscribe) {
        if (this.subscription.equals(topicsToSubscribe)) {
            return false;
        } else {
            this.subscription = topicsToSubscribe;
            return true;
        }
    }

    synchronized boolean groupSubscribe(Collection<String> topics) {
        if (!this.hasAutoAssignedPartitions()) {
            throw new IllegalStateException("Subscription to topics, partitions and pattern are mutually exclusive");
        } else {
            this.groupSubscription = new HashSet(topics);
            return !this.subscription.containsAll(this.groupSubscription);
        }
    }

    // ...
}

其中

private static enum SubscriptionType {
        NONE,
        AUTO_TOPICS,
        AUTO_PATTERN,
        USER_ASSIGNED;

        private SubscriptionType() {
        }
    }
private static class TopicPartitionState {
        private SubscriptionState.FetchState fetchState;
        private SubscriptionState.FetchPosition position;
        private Long highWatermark;
        private Long logStartOffset;
        private Long lastStableOffset;
        private boolean paused = false;
        private OffsetResetStrategy resetStrategy;
        private Long nextRetryTimeMs;
        private Integer preferredReadReplica;
        private Long preferredReadReplicaExpireTimeMs;
    
    // ...
}

TopicPartitionState中的highWatermark, position就对应的“ISR/HW/LEO”的问题: (以下来自stackoverflow question39203215):

The high watermark indicated the offset of messages that are fully replicated, while the end-of-log offset might be larger if there are newly appended records to the leader partition which are not replicated yet.
Consumers can only consumer messages up to the high watermark.

hw-leo.png

另外注意:

  • SubscriptionType => subscribe(topic): 自动:按照设置的规则,consumer subscribe to some topic;
  • TopicPartitionState => assign(somePartition): 手动:分配partition给consumer
    所以subscribe(topic)assign(somePartition)是互斥的;

ConsumerCoordinator

  • client: partitioning strategy
  • server:consumer group mmanagement
public final class ConsumerCoordinator extends AbstractCoordinator {
    private final GroupRebalanceConfig rebalanceConfig;
    private final Logger log;
    private final List<ConsumerPartitionAssignor> assignors;
    private final ConsumerMetadata metadata;
    private final ConsumerCoordinator.ConsumerCoordinatorMetrics sensors;
    private final SubscriptionState subscriptions;
    private final OffsetCommitCallback defaultOffsetCommitCallback;
    private final boolean autoCommitEnabled;
    private final int autoCommitIntervalMs;
    private final ConsumerInterceptors<?, ?> interceptors;
    private final AtomicInteger pendingAsyncCommits;
    private final ConcurrentLinkedQueue<ConsumerCoordinator.OffsetCommitCompletion> completedOffsetCommits;
    private boolean isLeader = false;
    private Set<String> joinedSubscription;
    private ConsumerCoordinator.MetadataSnapshot metadataSnapshot;
    private ConsumerCoordinator.MetadataSnapshot assignmentSnapshot;
    private Timer nextAutoCommitTimer;
    private AtomicBoolean asyncCommitFenced;
    private ConsumerGroupMetadata groupMetadata;
    private ConsumerCoordinator.PendingCommittedOffsetRequest pendingCommittedOffsetRequest = null;
    private final RebalanceProtocol protocol;
    // ...
}

assignor放在后面讲;其他重要的fields, rebalanceConfig, completedOffsetCommits, metadataSnapshot, assignmentSnapshot, pendingAsyncCommits等其实都是initialize consumer-partition state或者rebalance过程中的helper fields: 实际上是两步(Kafka 0.9.x之后的版本):

  1. Join Group
  2. Synchronizing Group State

Join Group:

  • Consumer --(JoinGroupRequest) -> GroupCoordinator;
  • GroupCoordinator -- (JoinGroupResponse w. Group Leader) -> Consumer

Synchronizing Group State:

  • Consumer --(SyncGroupRequest, Group Leader ∃ partitioning) -> GroupCoordinator;
  • GroupCoordinator -- (SyncGroupResponse) -> Consumer: parse responses
consumer-state

ConsumerPartitionAssignor

inheritance
public interface ConsumerPartitionAssignor {

    default ByteBuffer subscriptionUserData(Set<String> topics) { return null;  }

    ConsumerPartitionAssignor.GroupAssignment assign(Cluster var1, ConsumerPartitionAssignor.GroupSubscription var2);

    default void onAssignment(ConsumerPartitionAssignor.Assignment assignment, ConsumerGroupMetadata metadata) {}

    public static final class Assignment {
        private List<TopicPartition> partitions;
        private ByteBuffer userData;
        // ...
    }

    public static final class Subscription {
        private final List<String> topics;
        private final ByteBuffer userData;
        private final List<TopicPartition> ownedPartitions;
        private Optional<String> groupInstanceId;
    
        // ...
    }
}

AbstractPartitionAssignor为了简化ConsumerPartitionAssignor接口实现,对assign进行了实现;其中将Subscription中的userData去掉后,再进行分区分配:

public abstract class AbstractPartitionAssignor implements ConsumerPartitionAssignor {

    /**
     * Perform the group assignment given the partition counts and member subscriptions
     * @param partitionsPerTopic The number of partitions for each subscribed topic. Topics not in metadata will be excluded
     *                           from this map.
     * @param subscriptions Map from the member id to their respective topic subscription
     * @return Map from each member to the list of partitions assigned to them.
     */
    public abstract Map<String, List<TopicPartition>> assign(Map<String, Integer> partitionsPerTopic,
                                                             Map<String, Subscription> subscriptions);

    @Override
    public GroupAssignment assign(Cluster metadata, GroupSubscription groupSubscription) {
        Map<String, Subscription> subscriptions = groupSubscription.groupSubscription();
        Set<String> allSubscribedTopics = new HashSet<>();
        for (Map.Entry<String, Subscription> subscriptionEntry : subscriptions.entrySet())
            allSubscribedTopics.addAll(subscriptionEntry.getValue().topics()); // 没userData

        Map<String, Integer> partitionsPerTopic = new HashMap<>();
        for (String topic : allSubscribedTopics) {
            Integer numPartitions = metadata.partitionCountForTopic(topic);
            if (numPartitions != null && numPartitions > 0)
                partitionsPerTopic.put(topic, numPartitions);
            else
                log.debug("Skipping assignment for topic {} since no metadata is available", topic);
        }

        // 分区分配具体逻辑委托给了assign重载,是一个abstract方法,由子类实现
        Map<String, List<TopicPartition>> rawAssignments = assign(partitionsPerTopic, subscriptions);

        // this class maintains no user data, so just wrap the results
        Map<String, Assignment> assignments = new HashMap<>();
        for (Map.Entry<String, List<TopicPartition>> assignmentEntry : rawAssignments.entrySet())
            assignments.put(assignmentEntry.getKey(), new Assignment(assignmentEntry.getValue()));
        return new GroupAssignment(assignments);
    }

    // ...
}

注意这部分:

// 分区分配具体逻辑委托给了assign重载,是一个abstract方法,由子类实现
Map<String, List<TopicPartition>> rawAssignments = assign(partitionsPerTopic, subscriptions);

TODO:

Fetcher,Heatbeat分析,Rebalance实现, offset操作

禁止转载,如需转载请通过简信或评论联系作者。