00001 /* Glass - a distributed computing library 00002 Copyright (C) 2003-2009 Bruno Barberi Gnecco 00003 Copyright (C) 2009-2010 Corollarium Technologies 00004 00005 00006 This library is free software; you can redistribute it and/or 00007 modify it under the terms of the GNU Lesser General Public 00008 License as published by the Free Software Foundation; either 00009 version 2 of the License, or (at your option) any later version. 00010 00011 This library is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 Lesser General Public License for more details. 00015 00016 You should have received a copy of the GNU Lesser General Public 00017 License along with this library; if not, write to the Free Software 00018 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00019 */ 00020 00021 #ifndef _GLASS_PLUGIN_BARRIER_HH_ 00022 #define _GLASS_PLUGIN_BARRIER_HH_ 00023 00024 #include <map> 00025 #include "plugin.h" 00026 #include "exception.h" 00027 #include <boost/thread.hpp> 00028 00029 namespace libglass { 00030 00031 // Barrier states: inactive, active, locked, disabled 00032 // ================================================== 00033 // Barriers are tricky, because they deadlock very easily... 00034 // Suppose you have a program that consists of 2 barriers: 00035 // 00036 // Barrier *a, *b; 00037 // 00038 // a = new Barrier(); 00039 // b = new Barrier(); 00040 // 00041 // while (1) { 00042 // ... 00043 // a->sync(); 00044 // * ... 00045 // b->sync(); 00046 // ... 00047 // } 00048 // 00049 // Lets assume the first node starts and enters the loop before a second node joins: 00050 // 00051 // Consider the case if the barriers had only two states (locked/unlocked)... 00052 // If node 2 joined when node 1 was at * then a deadlock would occur, as node 1 00053 // would call b->sync(), and therefore wait for node 2 to call b->sync(), while 00054 // node 2 would call a->sync(), and wait for node 1 to call a->sync(). 00055 // 00056 // To overcome this problem Bruno and Paiva have designed the a system which uses 00057 // uses three barrier states: active/inactive/locked... 00058 // When a barrier is created (with new), it is set to the inactive state by default. 00059 // This means: if other nodes synchronize this barrier, they will not consider this node. 00060 // It's almost as if the barrier is disabled when it is in the inactive state. 00061 // When the first barrier sync() is called (let's say for barrier a), a is set to locked, 00062 // while all other barriers are still inactive. Barriers only get swithced to the active 00063 // state when their node calls sync() for the first time. Once in the active state, the 00064 // it's a locked/unlocked state machine (or more precisely locked/active using our current 00065 // terminology). 00066 // This advantage of this system is that it allows nodes to join the computation at any time 00067 // and synchronize automatically, avoiding deadlocks. 00068 // 00069 // A fourth barrier state has also been defined - disabled. Currently, however, this state 00070 // is not implemented. 00071 // 00072 // Multithreaded nodes and Barriers... 00073 // =================================== 00074 // There's a conceptual problem with Barrier and multithreaded nodes that remains unsolved... 00075 // 00076 // Suppose you have a Barrier b, which is global. How can Glass know which of the threads 00077 // running on a node are going to synchronize this barrier? The idea of giving each thread a 00078 // different Glass instance is not appealing! 00079 // Right now what happens is: if two threads running on the same node synchronize the same 00080 // barrier, Glass cannot distinguish this from the situation where one thread synchronises the 00081 // barrier twice. 00082 // Symptoms of this problem may explain the behaviour of the testthread1 application which is 00083 // currently not working well - often the nodes are left in a deadlocked state as some threads 00084 // are waiting on a barrier whilst other threads have destroyed it. 00085 // Further problems may be caused by concurrent access to data. I don't know how thread safe 00086 // the STL is, and I didn't want to clutter the code with mutexes. If there's a race condition, 00087 // it almost certainly can be solved by a few mutexes in the Barrier class methods. 00088 00089 00090 00091 class Barrier; 00092 00119 class BarrierBase : public PluginBase { 00123 PLUGIN(BarrierBase); 00124 00125 private: 00126 typedef enum { 00127 Active, 00128 Inactive, 00129 Locked, 00130 Disabled 00131 } BarrierState; 00132 00133 /* --- server --- */ 00134 00135 // Example servermap (2 barrier groups, 2 barrier numbers, 3 nodes)... 00136 // GroupNum BarrierNum NodeId State 00137 // -------------------------------------------------- 00138 // 0 0 0 Inactive 00139 // 0 0 1 Inactive 00140 // 0 0 2 Inactive 00141 // -------------------------------------------------- 00142 // 0 1 0 Inactive 00143 // 0 1 1 Inactive 00144 // 0 1 2 Inactive 00145 // -------------------------------------------------- 00146 // 1 0 0 Inactive 00147 // 1 0 1 Inactive 00148 // 1 0 2 Inactive 00149 // -------------------------------------------------- 00150 // 1 1 0 Inactive 00151 // 1 1 1 Inactive 00152 // 1 1 2 Inactive 00153 std::map<unsigned int, /* group */ 00154 std::map<unsigned int, /* barrier num */ 00155 std::map<nodeId, BarrierState> > > 00156 servermap; 00157 00158 // Example activemap (3 nodes)... 00159 // NodeId Active 00160 // -------------------- 00161 // 0 false 00162 // 1 false 00163 // 2 false 00164 std::map<nodeId, bool> activemap; 00165 00166 /* --- client --- */ 00167 00168 // Example clientmap (2 barrier groups, 2 barrier numbers)... 00169 // GroupNum BarrierNum Barrier* 00170 // ------------------------------------- 00171 // 0 0 ptr1 00172 // 0 1 ptr2 00173 // ------------------------------------- 00174 // 1 0 ptr3 00175 // 1 1 ptr4 00176 // ------------------------------------- 00177 std::map<unsigned int, /* group */ 00178 std::map<unsigned int, /* number */ 00179 Barrier *> > 00180 clientmap; 00181 00184 const char *state(BarrierState s); 00185 00199 bool checkUpdate(unsigned int barriergroup, unsigned int barriernum); 00200 00201 protected: 00202 friend class Barrier; 00203 00204 00215 bool registerBarrier(Barrier *b); 00216 00227 bool unregisterBarrier(Barrier *b); 00228 00229 00230 // ********************************************************************* 00231 // Destructor should be PROTECTED to ensure singleton behaviour 00232 // However, Microsoft compilers prior to VC v7.0 .NET are buggy and 00233 // generate the following error 00234 // error C2248: 'BarrierBase::~BarrierBase' : cannot access protected member declared in class 'libglass::BarrierBase' 00235 // The only workaround is to make the destructor PUBLIC - which kind 00236 // of destroys the whole reason for implementing the singleton class 00237 #if defined(_MSC_VER) && (_MSC_VER < 1300) // earlier than .NET compiler (VC 7.0) 00238 public: 00239 #else 00240 protected: 00241 #endif 00242 // ********************************************************************* 00246 ~BarrierBase(); 00247 00248 public: 00249 }; 00250 00258 class LIBGLASS_API Barrier : public PluginInterface { 00259 private: 00260 BarrierBase *bb; 00262 unsigned int number; 00263 unsigned int group; 00270 struct _syncdata { 00271 bool lock; 00272 bool retval; 00273 boost::condition_variable t; 00274 boost::mutex t_mut; 00276 boost::mutex mutex; 00278 _syncdata() : lock(false), retval(true) { } 00279 } syncdata; 00280 00281 protected: 00282 friend class BarrierBase; 00283 00284 public: 00292 Barrier(unsigned int num, unsigned int group = 0, 00293 bool active = false) throw(Exception); 00294 00298 ~Barrier(); 00299 00304 unsigned int getNumber(void) const { return this->number; } 00305 00310 unsigned int getGroup(void) const { return this->group; } 00311 00323 bool sync(void); 00324 00332 bool disable(void); 00333 00341 bool enable(void); 00342 }; 00343 00344 }//namespace 00345 00346 00347 #endif // _GLASS_PLUGIN_BARRIER_HH_