00001 00032 #include <errno.h> 00033 #include <signal.h> 00034 00035 #include "linden_common.h" 00036 #include "llapp.h" 00037 00038 #include "llheartbeat.h" 00039 00040 LLHeartbeat::LLHeartbeat(F32 secs_between_heartbeat, 00041 F32 aggressive_heartbeat_panic_secs, 00042 F32 aggressive_heartbeat_max_blocking_secs) 00043 : mSecsBetweenHeartbeat(secs_between_heartbeat), 00044 mAggressiveHeartbeatPanicSecs(aggressive_heartbeat_panic_secs), 00045 mAggressiveHeartbeatMaxBlockingSecs(aggressive_heartbeat_max_blocking_secs), 00046 mSuppressed(false) 00047 { 00048 mBeatTimer.reset(); 00049 mBeatTimer.setTimerExpirySec(mSecsBetweenHeartbeat); 00050 mPanicTimer.reset(); 00051 mPanicTimer.setTimerExpirySec(mAggressiveHeartbeatPanicSecs); 00052 } 00053 00054 LLHeartbeat::~LLHeartbeat() 00055 { 00056 // do nothing. 00057 } 00058 00059 void 00060 LLHeartbeat::setSuppressed(bool is_suppressed) 00061 { 00062 mSuppressed = is_suppressed; 00063 } 00064 00065 // returns 0 on success, -1 on permanent failure, 1 on temporary failure 00066 int 00067 LLHeartbeat::rawSend() 00068 { 00069 #if LL_WINDOWS 00070 return 0; // Pretend we succeeded. 00071 #else 00072 if (mSuppressed) 00073 return 0; // Pretend we succeeded. 00074 00075 union sigval dummy; 00076 int result = sigqueue(getppid(), LL_HEARTBEAT_SIGNAL, dummy); 00077 if (result == 0) 00078 return 0; // success 00079 00080 int err = errno; 00081 if (err == EAGAIN) 00082 return 1; // failed to queue, try again 00083 00084 return -1; // other failure. 00085 #endif 00086 } 00087 00088 int 00089 LLHeartbeat::rawSendWithTimeout(F32 timeout_sec) 00090 { 00091 int result = 0; 00092 00093 // Spin tightly until our heartbeat is digested by the watchdog 00094 // or we time-out. We don't really want to sleep because our 00095 // wake-up time might be undesirably synchronised to a hidden 00096 // clock by the system's scheduler. 00097 mTimeoutTimer.reset(); 00098 mTimeoutTimer.setTimerExpirySec(timeout_sec); 00099 do { 00100 result = rawSend(); 00101 //llinfos << " HEARTSENDc=" << result << llendl; 00102 } while (result==1 && !mTimeoutTimer.hasExpired()); 00103 00104 return result; 00105 } 00106 00107 bool 00108 LLHeartbeat::send(F32 timeout_sec) 00109 { 00110 bool total_success = false; 00111 int result = 1; 00112 00113 if (timeout_sec > 0.f) { 00114 // force a spin until success or timeout 00115 result = rawSendWithTimeout(timeout_sec); 00116 } else { 00117 if (mBeatTimer.hasExpired()) { 00118 // zero-timeout; we don't care too much whether our 00119 // heartbeat was digested. 00120 result = rawSend(); 00121 //llinfos << " HEARTSENDb=" << result << llendl; 00122 } 00123 } 00124 00125 if (result == -1) { 00126 // big failure. 00127 } else if (result == 0) { 00128 total_success = true; 00129 } else { 00130 // need to retry at some point 00131 } 00132 00133 if (total_success) { 00134 mBeatTimer.reset(); 00135 mBeatTimer.setTimerExpirySec(mSecsBetweenHeartbeat); 00136 // reset the time until we start panicking about lost 00137 // heartbeats again. 00138 mPanicTimer.reset(); 00139 mPanicTimer.setTimerExpirySec(mAggressiveHeartbeatPanicSecs); 00140 } else { 00141 // leave mBeatTimer as expired so we'll lazily poke the 00142 // watchdog again next time through. 00143 } 00144 00145 if (mPanicTimer.hasExpired()) { 00146 // It's been ages since we successfully had a heartbeat 00147 // digested by the watchdog. Sit here and spin a while 00148 // in the hope that we can force it through. 00149 llwarns << "Unable to deliver heartbeat to launcher for " << mPanicTimer.getElapsedTimeF32() << " seconds. Going to try very hard for up to " << mAggressiveHeartbeatMaxBlockingSecs << " seconds." << llendl; 00150 result = rawSendWithTimeout(mAggressiveHeartbeatMaxBlockingSecs); 00151 if (result == 0) { 00152 total_success = true; 00153 } else { 00154 // we couldn't even force it through. That's bad, 00155 // but we'll try again in a while. 00156 llwarns << "Could not deliver heartbeat to launcher even after trying very hard for " << mAggressiveHeartbeatMaxBlockingSecs << " seconds." << llendl; 00157 } 00158 00159 // in any case, reset the panic timer. 00160 mPanicTimer.reset(); 00161 mPanicTimer.setTimerExpirySec(mAggressiveHeartbeatPanicSecs); 00162 } 00163 00164 return total_success; 00165 }