zookeeper源码分析之四服务端(单机)处理请求流程
上文:
zookeeper源碼分析之一服務端啟動過程
中,我們介紹了zookeeper服務器的啟動過程,其中單機是ZookeeperServer啟動,集群使用QuorumPeer啟動,那么這次我們分析各自一下消息處理過程:
? 前文可以看到在
1.在單機情況下NettyServerCnxnFactory中啟動ZookeeperServer來處理消息:
public synchronized void startup() {if (sessionTracker == null) {createSessionTracker();}startSessionTracker(); setupRequestProcessors();registerJMX();state = State.RUNNING;notifyAll();}消息處理器的調用如下:
protected void setupRequestProcessors() {RequestProcessor finalProcessor = new FinalRequestProcessor(this);RequestProcessor syncProcessor = new SyncRequestProcessor(this,finalProcessor);((SyncRequestProcessor)syncProcessor).start();firstProcessor = new PrepRequestProcessor(this, syncProcessor);((PrepRequestProcessor)firstProcessor).start();}我們看到啟動兩個消息處理器來處理請求:第一個同步消息處理器預消息服務器,最后一個同步請求處理器和異步請求處理器。
1.1 ?第一個消息服務器處理器預消息服務器PrepRequestProcessor
@Overridepublic void run() {try {while (true) {Request request = submittedRequests.take();long traceMask = ZooTrace.CLIENT_REQUEST_TRACE_MASK;if (request.type == OpCode.ping) {traceMask = ZooTrace.CLIENT_PING_TRACE_MASK;}if (LOG.isTraceEnabled()) {ZooTrace.logRequest(LOG, traceMask, 'P', request, "");}if (Request.requestOfDeath == request) {break;} pRequest(request);}} catch (RequestProcessorException e) {if (e.getCause() instanceof XidRolloverException) {LOG.info(e.getCause().getMessage());}handleException(this.getName(), e);} catch (Exception e) {handleException(this.getName(), e);}LOG.info("PrepRequestProcessor exited loop!");}
?可以看到,while(true)是一個一直循環處理的過程,其中紅色的部分為處理的主體。
/*** This method will be called inside the ProcessRequestThread, which is a* singleton, so there will be a single thread calling this code.** @param request*/protected void pRequest(Request request) throws RequestProcessorException {// LOG.info("Prep>>> cxid = " + request.cxid + " type = " +// request.type + " id = 0x" + Long.toHexString(request.sessionId));request.setHdr(null);request.setTxn(null);try {switch (request.type) {case OpCode.createContainer:case OpCode.create:case OpCode.create2: CreateRequest create2Request = new CreateRequest();pRequest2Txn(request.type, zks.getNextZxid(), request, create2Request, true);break;case OpCode.deleteContainer:case OpCode.delete:DeleteRequest deleteRequest = new DeleteRequest();pRequest2Txn(request.type, zks.getNextZxid(), request, deleteRequest, true);break;case OpCode.setData:SetDataRequest setDataRequest = new SetDataRequest(); pRequest2Txn(request.type, zks.getNextZxid(), request, setDataRequest, true);break;case OpCode.reconfig:ReconfigRequest reconfigRequest = new ReconfigRequest();ByteBufferInputStream.byteBuffer2Record(request.request, reconfigRequest);pRequest2Txn(request.type, zks.getNextZxid(), request, reconfigRequest, true);break;case OpCode.setACL:SetACLRequest setAclRequest = new SetACLRequest(); pRequest2Txn(request.type, zks.getNextZxid(), request, setAclRequest, true);break;case OpCode.check:CheckVersionRequest checkRequest = new CheckVersionRequest(); pRequest2Txn(request.type, zks.getNextZxid(), request, checkRequest, true);break;case OpCode.multi:MultiTransactionRecord multiRequest = new MultiTransactionRecord();try {ByteBufferInputStream.byteBuffer2Record(request.request, multiRequest);} catch(IOException e) {request.setHdr(new TxnHeader(request.sessionId, request.cxid, zks.getNextZxid(),Time.currentWallTime(), OpCode.multi));throw e;}List<Txn> txns = new ArrayList<Txn>();//Each op in a multi-op must have the same zxid!long zxid = zks.getNextZxid();KeeperException ke = null;//Store off current pending change records in case we need to rollbackMap<String, ChangeRecord> pendingChanges = getPendingChanges(multiRequest);for(Op op: multiRequest) {Record subrequest = op.toRequestRecord();int type;Record txn;/* If we've already failed one of the ops, don't bother* trying the rest as we know it's going to fail and it* would be confusing in the logfiles.*/if (ke != null) {type = OpCode.error;txn = new ErrorTxn(Code.RUNTIMEINCONSISTENCY.intValue());}/* Prep the request and convert to a Txn */else {try {pRequest2Txn(op.getType(), zxid, request, subrequest, false);type = request.getHdr().getType();txn = request.getTxn();} catch (KeeperException e) {ke = e;type = OpCode.error;txn = new ErrorTxn(e.code().intValue());LOG.info("Got user-level KeeperException when processing "+ request.toString() + " aborting remaining multi ops."+ " Error Path:" + e.getPath()+ " Error:" + e.getMessage());request.setException(e);/* Rollback change records from failed multi-op */rollbackPendingChanges(zxid, pendingChanges);}}//FIXME: I don't want to have to serialize it here and then// immediately deserialize in next processor. But I'm// not sure how else to get the txn stored into our list.ByteArrayOutputStream baos = new ByteArrayOutputStream();BinaryOutputArchive boa = BinaryOutputArchive.getArchive(baos);txn.serialize(boa, "request") ;ByteBuffer bb = ByteBuffer.wrap(baos.toByteArray());txns.add(new Txn(type, bb.array()));}request.setHdr(new TxnHeader(request.sessionId, request.cxid, zxid,Time.currentWallTime(), request.type));request.setTxn(new MultiTxn(txns));break;//create/close session don't require request recordcase OpCode.createSession:case OpCode.closeSession:if (!request.isLocalSession()) {pRequest2Txn(request.type, zks.getNextZxid(), request,null, true);}break;//All the rest don't need to create a Txn - just verify sessioncase OpCode.sync:case OpCode.exists:case OpCode.getData:case OpCode.getACL:case OpCode.getChildren:case OpCode.getChildren2:case OpCode.ping:case OpCode.setWatches:case OpCode.checkWatches:case OpCode.removeWatches:zks.sessionTracker.checkSession(request.sessionId,request.getOwner());break;default:LOG.warn("unknown type " + request.type);break;}} catch (KeeperException e) {if (request.getHdr() != null) {request.getHdr().setType(OpCode.error);request.setTxn(new ErrorTxn(e.code().intValue()));}LOG.info("Got user-level KeeperException when processing "+ request.toString()+ " Error Path:" + e.getPath()+ " Error:" + e.getMessage());request.setException(e);} catch (Exception e) {// log at error level as we are returning a marshalling// error to the userLOG.error("Failed to process " + request, e);StringBuilder sb = new StringBuilder();ByteBuffer bb = request.request;if(bb != null){bb.rewind();while (bb.hasRemaining()) {sb.append(Integer.toHexString(bb.get() & 0xff));}} else {sb.append("request buffer is null");}LOG.error("Dumping request buffer: 0x" + sb.toString());if (request.getHdr() != null) {request.getHdr().setType(OpCode.error);request.setTxn(new ErrorTxn(Code.MARSHALLINGERROR.intValue()));}}request.zxid = zks.getZxid();nextProcessor.processRequest(request);}排除異常的邏輯,該方法是處理不同類型的request,根據type選擇一個處理分支,ProcessRequestThread內部調用該方法,它是單例的,因此只有一個單線程調用此代碼。以create請求為例(紅色部分),了解工作機制:
CreateRequest createRequest = (CreateRequest)record;if (deserialize) {ByteBufferInputStream.byteBuffer2Record(request.request, createRequest);}CreateMode createMode = CreateMode.fromFlag(createRequest.getFlags());validateCreateRequest(createMode, request);String path = createRequest.getPath();String parentPath = validatePathForCreate(path, request.sessionId);List<ACL> listACL = fixupACL(path, request.authInfo, createRequest.getAcl());ChangeRecord parentRecord = getRecordForPath(parentPath);checkACL(zks, parentRecord.acl, ZooDefs.Perms.CREATE, request.authInfo);int parentCVersion = parentRecord.stat.getCversion();if (createMode.isSequential()) {path = path + String.format(Locale.ENGLISH, "%010d", parentCVersion);}validatePath(path, request.sessionId);try {if (getRecordForPath(path) != null) {throw new KeeperException.NodeExistsException(path);}} catch (KeeperException.NoNodeException e) {// ignore this one }boolean ephemeralParent = (parentRecord.stat.getEphemeralOwner() != 0) &&(parentRecord.stat.getEphemeralOwner() != DataTree.CONTAINER_EPHEMERAL_OWNER);if (ephemeralParent) {throw new KeeperException.NoChildrenForEphemeralsException(path);}int newCversion = parentRecord.stat.getCversion()+1;if (type == OpCode.createContainer) {request.setTxn(new CreateContainerTxn(path, createRequest.getData(), listACL, newCversion));} else {request.setTxn(new CreateTxn(path, createRequest.getData(), listACL, createMode.isEphemeral(),newCversion));}StatPersisted s = new StatPersisted();if (createMode.isEphemeral()) {s.setEphemeralOwner(request.sessionId);}parentRecord = parentRecord.duplicate(request.getHdr().getZxid());parentRecord.childCount++;parentRecord.stat.setCversion(newCversion); addChangeRecord(parentRecord);addChangeRecord(new ChangeRecord(request.getHdr().getZxid(), path, s, 0, listACL));break;調用方法,處理變化:
private void addChangeRecord(ChangeRecord c) {synchronized (zks.outstandingChanges) {zks.outstandingChanges.add(c);zks.outstandingChangesForPath.put(c.path, c);}}繼續向下
private void addChangeRecord(ChangeRecord c) {synchronized (zks.outstandingChanges) {zks.outstandingChanges.add(c);zks.outstandingChangesForPath.put(c.path, c);}}其中:outstandingChanges 是一組ChangeRecord,outstandingChangesForPath是map的ChangeRecord,如下定義:
final List<ChangeRecord> outstandingChanges = new ArrayList<ChangeRecord>();
// this data structure must be accessed under the outstandingChanges lock
final HashMap<String, ChangeRecord> outstandingChangesForPath =
new HashMap<String, ChangeRecord>();
ChangeRecord是一個數據結構,方便PrepRP和FinalRp共享信息。
ChangeRecord(long zxid, String path, StatPersisted stat, int childCount,List<ACL> acl) {this.zxid = zxid;this.path = path;this.stat = stat;this.childCount = childCount;this.acl = acl;}?
1.2?先看一下同步請求處理器FinalRequestProcessor,這個請求處理器實際上應用到一個請求的所有事務,針對任何查詢提供服務。它通常處于請求處理的最后(不會有下一個消息處理器),故此得名。 它是如何處理請求呢?
public void processRequest(Request request) {if (LOG.isDebugEnabled()) {LOG.debug("Processing request:: " + request);}// request.addRQRec(">final");long traceMask = ZooTrace.CLIENT_REQUEST_TRACE_MASK;if (request.type == OpCode.ping) {traceMask = ZooTrace.SERVER_PING_TRACE_MASK;}if (LOG.isTraceEnabled()) {ZooTrace.logRequest(LOG, traceMask, 'E', request, "");}ProcessTxnResult rc = null;synchronized (zks.outstandingChanges) {// Need to process local session requestsrc = zks.processTxn(request);// request.hdr is set for write requests, which are the only ones// that add to outstandingChanges.if (request.getHdr() != null) {TxnHeader hdr = request.getHdr();Record txn = request.getTxn();long zxid = hdr.getZxid();while (!zks.outstandingChanges.isEmpty()&& zks.outstandingChanges.get(0).zxid <= zxid) {ChangeRecord cr = zks.outstandingChanges.remove(0);if (cr.zxid < zxid) {LOG.warn("Zxid outstanding " + cr.zxid+ " is less than current " + zxid);}if (zks.outstandingChangesForPath.get(cr.path) == cr) {zks.outstandingChangesForPath.remove(cr.path);}}}// do not add non quorum packets to the queue. if (request.isQuorum()) {zks.getZKDatabase().addCommittedProposal(request);}}// ZOOKEEPER-558:// In some cases the server does not close the connection (e.g., closeconn buffer// was not being queued — ZOOKEEPER-558) properly. This happens, for example,// when the client closes the connection. The server should still close the session, though.// Calling closeSession() after losing the cnxn, results in the client close session response being dropped.if (request.type == OpCode.closeSession && connClosedByClient(request)) {// We need to check if we can close the session id.// Sometimes the corresponding ServerCnxnFactory could be null because// we are just playing diffs from the leader.if (closeSession(zks.serverCnxnFactory, request.sessionId) ||closeSession(zks.secureServerCnxnFactory, request.sessionId)) {return;}}if (request.cnxn == null) {return;}ServerCnxn cnxn = request.cnxn;String lastOp = "NA";zks.decInProcess();Code err = Code.OK;Record rsp = null;try {if (request.getHdr() != null && request.getHdr().getType() == OpCode.error) {/** When local session upgrading is disabled, leader will* reject the ephemeral node creation due to session expire.* However, if this is the follower that issue the request,* it will have the correct error code, so we should use that* and report to user*/if (request.getException() != null) {throw request.getException();} else {throw KeeperException.create(KeeperException.Code.get(((ErrorTxn) request.getTxn()).getErr()));}}KeeperException ke = request.getException();if (ke != null && request.type != OpCode.multi) {throw ke;}if (LOG.isDebugEnabled()) {LOG.debug("{}",request);}switch (request.type) {case OpCode.ping: {zks.serverStats().updateLatency(request.createTime);lastOp = "PING";cnxn.updateStatsForResponse(request.cxid, request.zxid, lastOp,request.createTime, Time.currentElapsedTime());cnxn.sendResponse(new ReplyHeader(-2,zks.getZKDatabase().getDataTreeLastProcessedZxid(), 0), null, "response");return;}case OpCode.createSession: {zks.serverStats().updateLatency(request.createTime);lastOp = "SESS";cnxn.updateStatsForResponse(request.cxid, request.zxid, lastOp,request.createTime, Time.currentElapsedTime());zks.finishSessionInit(request.cnxn, true);return;}case OpCode.multi: {lastOp = "MULT";rsp = new MultiResponse() ;for (ProcessTxnResult subTxnResult : rc.multiResult) {OpResult subResult ;switch (subTxnResult.type) {case OpCode.check:subResult = new CheckResult();break;case OpCode.create:subResult = new CreateResult(subTxnResult.path);break;case OpCode.create2:case OpCode.createContainer:subResult = new CreateResult(subTxnResult.path, subTxnResult.stat);break;case OpCode.delete:case OpCode.deleteContainer:subResult = new DeleteResult();break;case OpCode.setData:subResult = new SetDataResult(subTxnResult.stat);break;case OpCode.error:subResult = new ErrorResult(subTxnResult.err) ;break;default:throw new IOException("Invalid type of op");}((MultiResponse)rsp).add(subResult);}break;}case OpCode.create: {lastOp = "CREA";rsp = new CreateResponse(rc.path);err = Code.get(rc.err);break;}case OpCode.create2:case OpCode.createContainer: {lastOp = "CREA";rsp = new Create2Response(rc.path, rc.stat);err = Code.get(rc.err);break;}case OpCode.delete:case OpCode.deleteContainer: {lastOp = "DELE";err = Code.get(rc.err);break;}case OpCode.setData: {lastOp = "SETD";rsp = new SetDataResponse(rc.stat);err = Code.get(rc.err);break;} case OpCode.reconfig: {lastOp = "RECO"; rsp = new GetDataResponse(((QuorumZooKeeperServer)zks).self.getQuorumVerifier().toString().getBytes(), rc.stat);err = Code.get(rc.err);break;}case OpCode.setACL: {lastOp = "SETA";rsp = new SetACLResponse(rc.stat);err = Code.get(rc.err);break;}case OpCode.closeSession: {lastOp = "CLOS";err = Code.get(rc.err);break;}case OpCode.sync: {lastOp = "SYNC";SyncRequest syncRequest = new SyncRequest();ByteBufferInputStream.byteBuffer2Record(request.request,syncRequest);rsp = new SyncResponse(syncRequest.getPath());break;}case OpCode.check: {lastOp = "CHEC";rsp = new SetDataResponse(rc.stat);err = Code.get(rc.err);break;}case OpCode.exists: {lastOp = "EXIS";// TODO we need to figure out the security requirement for this!ExistsRequest existsRequest = new ExistsRequest();ByteBufferInputStream.byteBuffer2Record(request.request,existsRequest);String path = existsRequest.getPath();if (path.indexOf('\0') != -1) {throw new KeeperException.BadArgumentsException();}Stat stat = zks.getZKDatabase().statNode(path, existsRequest.getWatch() ? cnxn : null);rsp = new ExistsResponse(stat);break;}case OpCode.getData: {lastOp = "GETD";GetDataRequest getDataRequest = new GetDataRequest();ByteBufferInputStream.byteBuffer2Record(request.request,getDataRequest);DataNode n = zks.getZKDatabase().getNode(getDataRequest.getPath());if (n == null) {throw new KeeperException.NoNodeException();}Long aclL;synchronized(n) {aclL = n.acl;}PrepRequestProcessor.checkACL(zks, zks.getZKDatabase().convertLong(aclL),ZooDefs.Perms.READ,request.authInfo);Stat stat = new Stat();byte b[] = zks.getZKDatabase().getData(getDataRequest.getPath(), stat,getDataRequest.getWatch() ? cnxn : null);rsp = new GetDataResponse(b, stat);break;}case OpCode.setWatches: {lastOp = "SETW";SetWatches setWatches = new SetWatches();// XXX We really should NOT need this!!!! request.request.rewind();ByteBufferInputStream.byteBuffer2Record(request.request, setWatches);long relativeZxid = setWatches.getRelativeZxid();zks.getZKDatabase().setWatches(relativeZxid,setWatches.getDataWatches(),setWatches.getExistWatches(),setWatches.getChildWatches(), cnxn);break;}case OpCode.getACL: {lastOp = "GETA";GetACLRequest getACLRequest = new GetACLRequest();ByteBufferInputStream.byteBuffer2Record(request.request,getACLRequest);Stat stat = new Stat();List<ACL> acl =zks.getZKDatabase().getACL(getACLRequest.getPath(), stat);rsp = new GetACLResponse(acl, stat);break;}case OpCode.getChildren: {lastOp = "GETC";GetChildrenRequest getChildrenRequest = new GetChildrenRequest();ByteBufferInputStream.byteBuffer2Record(request.request,getChildrenRequest);DataNode n = zks.getZKDatabase().getNode(getChildrenRequest.getPath());if (n == null) {throw new KeeperException.NoNodeException();}Long aclG;synchronized(n) {aclG = n.acl;}PrepRequestProcessor.checkACL(zks, zks.getZKDatabase().convertLong(aclG),ZooDefs.Perms.READ,request.authInfo);List<String> children = zks.getZKDatabase().getChildren(getChildrenRequest.getPath(), null, getChildrenRequest.getWatch() ? cnxn : null);rsp = new GetChildrenResponse(children);break;}case OpCode.getChildren2: {lastOp = "GETC";GetChildren2Request getChildren2Request = new GetChildren2Request();ByteBufferInputStream.byteBuffer2Record(request.request,getChildren2Request);Stat stat = new Stat();DataNode n = zks.getZKDatabase().getNode(getChildren2Request.getPath());if (n == null) {throw new KeeperException.NoNodeException();}Long aclG;synchronized(n) {aclG = n.acl;}PrepRequestProcessor.checkACL(zks, zks.getZKDatabase().convertLong(aclG),ZooDefs.Perms.READ,request.authInfo);List<String> children = zks.getZKDatabase().getChildren(getChildren2Request.getPath(), stat, getChildren2Request.getWatch() ? cnxn : null);rsp = new GetChildren2Response(children, stat);break;}case OpCode.checkWatches: {lastOp = "CHKW";CheckWatchesRequest checkWatches = new CheckWatchesRequest();ByteBufferInputStream.byteBuffer2Record(request.request,checkWatches);WatcherType type = WatcherType.fromInt(checkWatches.getType());boolean containsWatcher = zks.getZKDatabase().containsWatcher(checkWatches.getPath(), type, cnxn);if (!containsWatcher) {String msg = String.format(Locale.ENGLISH, "%s (type: %s)",new Object[] { checkWatches.getPath(), type });throw new KeeperException.NoWatcherException(msg);}break;}case OpCode.removeWatches: {lastOp = "REMW";RemoveWatchesRequest removeWatches = new RemoveWatchesRequest();ByteBufferInputStream.byteBuffer2Record(request.request,removeWatches);WatcherType type = WatcherType.fromInt(removeWatches.getType());boolean removed = zks.getZKDatabase().removeWatch(removeWatches.getPath(), type, cnxn);if (!removed) {String msg = String.format(Locale.ENGLISH, "%s (type: %s)",new Object[] { removeWatches.getPath(), type });throw new KeeperException.NoWatcherException(msg);}break;}}} catch (SessionMovedException e) {// session moved is a connection level error, we need to tear// down the connection otw ZOOKEEPER-710 might happen// ie client on slow follower starts to renew session, fails// before this completes, then tries the fast follower (leader)// and is successful, however the initial renew is then// successfully fwd/processed by the leader and as a result// the client and leader disagree on where the client is most// recently attached (and therefore invalid SESSION MOVED generated) cnxn.sendCloseSession();return;} catch (KeeperException e) {err = e.code();} catch (Exception e) {// log at error level as we are returning a marshalling// error to the userLOG.error("Failed to process " + request, e);StringBuilder sb = new StringBuilder();ByteBuffer bb = request.request;bb.rewind();while (bb.hasRemaining()) {sb.append(Integer.toHexString(bb.get() & 0xff));}LOG.error("Dumping request buffer: 0x" + sb.toString());err = Code.MARSHALLINGERROR;}long lastZxid = zks.getZKDatabase().getDataTreeLastProcessedZxid();ReplyHeader hdr =new ReplyHeader(request.cxid, lastZxid, err.intValue());zks.serverStats().updateLatency(request.createTime);cnxn.updateStatsForResponse(request.cxid, lastZxid, lastOp,request.createTime, Time.currentElapsedTime());try {cnxn.sendResponse(hdr, rsp, "response");if (request.type == OpCode.closeSession) {cnxn.sendCloseSession();}} catch (IOException e) {LOG.error("FIXMSG",e);}}? 第一步,根據共享的outstandingChanges,
先處理事務后處理session:
private ProcessTxnResult processTxn(Request request, TxnHeader hdr,Record txn) {ProcessTxnResult rc;int opCode = request != null ? request.type : hdr.getType();long sessionId = request != null ? request.sessionId : hdr.getClientId();if (hdr != null) {rc = getZKDatabase().processTxn(hdr, txn);} else {rc = new ProcessTxnResult();}if (opCode == OpCode.createSession) {if (hdr != null && txn instanceof CreateSessionTxn) {CreateSessionTxn cst = (CreateSessionTxn) txn;sessionTracker.addGlobalSession(sessionId, cst.getTimeOut());} else if (request != null && request.isLocalSession()) {request.request.rewind();int timeout = request.request.getInt();request.request.rewind();sessionTracker.addSession(request.sessionId, timeout);} else {LOG.warn("*****>>>>> Got "+ txn.getClass() + " "+ txn.toString());}} else if (opCode == OpCode.closeSession) {sessionTracker.removeSession(sessionId);}return rc;}處理事務,本地和數據庫的不同分支, DataTree創建節點
CreateTxn createTxn = (CreateTxn) txn;rc.path = createTxn.getPath();createNode(createTxn.getPath(),createTxn.getData(),createTxn.getAcl(),createTxn.getEphemeral() ? header.getClientId() : 0,createTxn.getParentCVersion(),header.getZxid(), header.getTime(), null);break;新增一個節點的邏輯是:
/*** Add a new node to the DataTree.* @param path* Path for the new node.* @param data* Data to store in the node.* @param acl* Node acls* @param ephemeralOwner* the session id that owns this node. -1 indicates this is not* an ephemeral node.* @param zxid* Transaction ID* @param time* @param outputStat* A Stat object to store Stat output results into.* @throws NodeExistsException * @throws NoNodeException * @throws KeeperException*/public void createNode(final String path, byte data[], List<ACL> acl,long ephemeralOwner, int parentCVersion, long zxid, long time, Stat outputStat)throws KeeperException.NoNodeException,KeeperException.NodeExistsException {int lastSlash = path.lastIndexOf('/');String parentName = path.substring(0, lastSlash);String childName = path.substring(lastSlash + 1);StatPersisted stat = new StatPersisted();stat.setCtime(time);stat.setMtime(time);stat.setCzxid(zxid);stat.setMzxid(zxid);stat.setPzxid(zxid);stat.setVersion(0);stat.setAversion(0);stat.setEphemeralOwner(ephemeralOwner);DataNode parent = nodes.get(parentName);if (parent == null) {throw new KeeperException.NoNodeException();}synchronized (parent) {Set<String> children = parent.getChildren();if (children != null && children.contains(childName)) {throw new KeeperException.NodeExistsException();}if (parentCVersion == -1) {parentCVersion = parent.stat.getCversion();parentCVersion++;}parent.stat.setCversion(parentCVersion);parent.stat.setPzxid(zxid);Long longval = convertAcls(acl);DataNode child = new DataNode(data, longval, stat);parent.addChild(childName);nodes.put(path, child);if (ephemeralOwner == CONTAINER_EPHEMERAL_OWNER) {containers.add(path);} else if (ephemeralOwner != 0) {HashSet<String> list = ephemerals.get(ephemeralOwner);if (list == null) {list = new HashSet<String>();ephemerals.put(ephemeralOwner, list);}synchronized (list) {list.add(path);}}if (outputStat != null) {child.copyStat(outputStat);}}// now check if its one of the zookeeper node childif (parentName.startsWith(quotaZookeeper)) {// now check if its the limit nodeif (Quotas.limitNode.equals(childName)) {// this is the limit node// get the parent and add it to the trie pTrie.addPath(parentName.substring(quotaZookeeper.length()));}if (Quotas.statNode.equals(childName)) {updateQuotaForPath(parentName.substring(quotaZookeeper.length()));}}// also check to update the quotas for this nodeString lastPrefix = getMaxPrefixWithQuota(path);if(lastPrefix != null) {// ok we have some match and need to updateupdateCount(lastPrefix, 1);updateBytes(lastPrefix, data == null ? 0 : data.length);} dataWatches.triggerWatch(path, Event.EventType.NodeCreated);childWatches.triggerWatch(parentName.equals("") ? "/" : parentName,Event.EventType.NodeChildrenChanged);}最后的邏輯是觸發創建節點和子節點改變事件。
Set<Watcher> triggerWatch(String path, EventType type, Set<Watcher> supress) {WatchedEvent e = new WatchedEvent(type,KeeperState.SyncConnected, path);HashSet<Watcher> watchers;synchronized (this) {watchers = watchTable.remove(path);if (watchers == null || watchers.isEmpty()) {if (LOG.isTraceEnabled()) {ZooTrace.logTraceMessage(LOG,ZooTrace.EVENT_DELIVERY_TRACE_MASK,"No watchers for " + path);}return null;}for (Watcher w : watchers) {HashSet<String> paths = watch2Paths.get(w);if (paths != null) {paths.remove(path);}}}for (Watcher w : watchers) {if (supress != null && supress.contains(w)) {continue;} w.process(e);}return watchers;}WatcherManager調用定義的watcher進行事件處理。
1.3. 再看異步消息處理器SyncRequestProcessor
@Overridepublic void run() {try {int logCount = 0;// we do this in an attempt to ensure that not all of the servers// in the ensemble take a snapshot at the same timeint randRoll = r.nextInt(snapCount/2);while (true) {Request si = null;if (toFlush.isEmpty()) {si = queuedRequests.take();} else {si = queuedRequests.poll();if (si == null) {flush(toFlush);continue;}}if (si == requestOfDeath) {break;}if (si != null) {// track the number of records written to the logif (zks.getZKDatabase().append(si)) {logCount++;if (logCount > (snapCount / 2 + randRoll)) {randRoll = r.nextInt(snapCount/2);// roll the log zks.getZKDatabase().rollLog();// take a snapshotif (snapInProcess != null && snapInProcess.isAlive()) {LOG.warn("Too busy to snap, skipping");} else { snapInProcess = new ZooKeeperThread("Snapshot Thread") {public void run() {try {zks.takeSnapshot();} catch(Exception e) {LOG.warn("Unexpected exception", e);}}};snapInProcess.start();}logCount = 0;}} else if (toFlush.isEmpty()) {// optimization for read heavy workloads// iff this is a read, and there are no pending// flushes (writes), then just pass this to the next// processorif (nextProcessor != null) {nextProcessor.processRequest(si);if (nextProcessor instanceof Flushable) {((Flushable)nextProcessor).flush();}}continue;}toFlush.add(si);if (toFlush.size() > 1000) {flush(toFlush);}}}} catch (Throwable t) {handleException(this.getName(), t);} finally{running = false;}LOG.info("SyncRequestProcessor exited!");}? 異步處理日志和快照,啟動ZooKeeperThread線程來生成快照。
public void takeSnapshot(){try {txnLogFactory.save(zkDb.getDataTree(), zkDb.getSessionWithTimeOuts());} catch (IOException e) {LOG.error("Severe unrecoverable error, exiting", e);// This is a severe error that we cannot recover from,// so we need to exitSystem.exit(10);}}FileTxnSnapLog是個工具類,幫助處理txtlog和snapshot。
/*** save the datatree and the sessions into a snapshot* @param dataTree the datatree to be serialized onto disk* @param sessionsWithTimeouts the sesssion timeouts to be* serialized onto disk* @throws IOException*/public void save(DataTree dataTree,ConcurrentHashMap<Long, Integer> sessionsWithTimeouts)throws IOException {long lastZxid = dataTree.lastProcessedZxid;File snapshotFile = new File(snapDir, Util.makeSnapshotName(lastZxid));LOG.info("Snapshotting: 0x{} to {}", Long.toHexString(lastZxid),snapshotFile); snapLog.serialize(dataTree, sessionsWithTimeouts, snapshotFile); }持久化為文件
/*** serialize the datatree and session into the file snapshot* @param dt the datatree to be serialized* @param sessions the sessions to be serialized* @param snapShot the file to store snapshot into*/public synchronized void serialize(DataTree dt, Map<Long, Integer> sessions, File snapShot)throws IOException {if (!close) {OutputStream sessOS = new BufferedOutputStream(new FileOutputStream(snapShot));CheckedOutputStream crcOut = new CheckedOutputStream(sessOS, new Adler32());//CheckedOutputStream cout = new CheckedOutputStream()OutputArchive oa = BinaryOutputArchive.getArchive(crcOut);FileHeader header = new FileHeader(SNAP_MAGIC, VERSION, dbId);serialize(dt,sessions,oa, header);long val = crcOut.getChecksum().getValue();oa.writeLong(val, "val");oa.writeString("/", "path");sessOS.flush();crcOut.close();sessOS.close();}}至此,整個流程已經走完。
2. 集群情況下
集群情況和單機略有不同,集群中使用QuorumPeer來啟動ServerCnxnFactory,綁定本地地址
@Overridepublic void start() {LOG.info("binding to port " + localAddress);parentChannel = bootstrap.bind(localAddress);}限于篇幅,后面的邏輯將在下篇中詳細描述。
小結
從上面的代碼流程中,我們可以看出服務器處理請求要么通過Noi要不通過框架Netty來處理請求,請求通過先通過PrepRequestProcessor接收請求,并進行包裝,然后請求類型的不同,設置同享數據;然后通過SyncRequestProcessor來序列化快照和事務日志,并根據命令類型改變db的內容,在日志和快照沒有寫入前不會進行下一個消息處理器;最后調用FinalRequestProcessor來作為消息處理器的終結者,發送響應消息,并觸發watcher的處理程序?。
?
轉載于:https://www.cnblogs.com/davidwang456/p/5001244.html
總結
以上是生活随笔為你收集整理的zookeeper源码分析之四服务端(单机)处理请求流程的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: zookeeper源码分析之三客户端发送
- 下一篇: 一次上线事故经验