@@ -130,7 +130,11 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
130130 orte_state_caddy_t * caddy = (orte_state_caddy_t * )cbdata ;
131131 orte_topology_t * t ;
132132 orte_node_t * node ;
133- int i ;
133+ int i , rc ;
134+ uint8_t u8 ;
135+ opal_buffer_t buf ;
136+ orte_grpcomm_signature_t * sig ;
137+ orte_daemon_cmd_flag_t command = ORTE_DAEMON_PASS_NODE_INFO_CMD ;
134138
135139 ORTE_ACQUIRE_OBJECT (caddy );
136140
@@ -177,6 +181,78 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
177181 /* ensure we update the routing plan */
178182 orte_routed .update_routing_plan (NULL );
179183
184+ /* prep the buffer */
185+ OBJ_CONSTRUCT (& buf , opal_buffer_t );
186+ /* load the command */
187+ if (ORTE_SUCCESS != (rc = opal_dss .pack (& buf , & command , 1 , ORTE_DAEMON_CMD ))) {
188+ ORTE_ERROR_LOG (rc );
189+ OBJ_DESTRUCT (& buf );
190+ ORTE_FORCED_TERMINATE (ORTE_ERROR_DEFAULT_EXIT_CODE );
191+ OBJ_RELEASE (caddy );
192+ return ;
193+ }
194+
195+
196+ /* if we did not execute a tree-spawn, then the daemons do
197+ * not currently have a nidmap for the job - in that case,
198+ * send one to them */
199+ if (!orte_nidmap_communicated ) {
200+ u8 = 1 ;
201+ if (ORTE_SUCCESS != (rc = opal_dss .pack (& buf , & u8 , 1 , OPAL_UINT8 ))) {
202+ ORTE_ERROR_LOG (rc );
203+ OBJ_DESTRUCT (& buf );
204+ ORTE_FORCED_TERMINATE (ORTE_ERROR_DEFAULT_EXIT_CODE );
205+ OBJ_RELEASE (caddy );
206+ return ;
207+ }
208+ if (OPAL_SUCCESS != (rc = orte_util_nidmap_create (orte_node_pool , & buf ))) {
209+ ORTE_ERROR_LOG (rc );
210+ OBJ_DESTRUCT (& buf );
211+ ORTE_FORCED_TERMINATE (ORTE_ERROR_DEFAULT_EXIT_CODE );
212+ OBJ_RELEASE (caddy );
213+ return ;
214+ }
215+ orte_nidmap_communicated = true;
216+ } else {
217+ u8 = 0 ;
218+ if (ORTE_SUCCESS != (rc = opal_dss .pack (& buf , & u8 , 1 , OPAL_UINT8 ))) {
219+ ORTE_ERROR_LOG (rc );
220+ OBJ_DESTRUCT (& buf );
221+ ORTE_FORCED_TERMINATE (ORTE_ERROR_DEFAULT_EXIT_CODE );
222+ OBJ_RELEASE (caddy );
223+ return ;
224+ }
225+ }
226+
227+ /* we always send the topologies and the #slots on each node. Note
228+ * that we cannot send the #slots until after the above step since,
229+ * for unmanaged allocations, we might have just determined it! */
230+ if (OPAL_SUCCESS != (rc = orte_util_pass_node_info (& buf ))) {
231+ ORTE_ERROR_LOG (rc );
232+ OBJ_DESTRUCT (& buf );
233+ ORTE_FORCED_TERMINATE (ORTE_ERROR_DEFAULT_EXIT_CODE );
234+ OBJ_RELEASE (caddy );
235+ return ;
236+ }
237+
238+ /* goes to all daemons */
239+ sig = OBJ_NEW (orte_grpcomm_signature_t );
240+ sig -> signature = (orte_process_name_t * )malloc (sizeof (orte_process_name_t ));
241+ sig -> signature [0 ].jobid = ORTE_PROC_MY_NAME -> jobid ;
242+ sig -> signature [0 ].vpid = ORTE_VPID_WILDCARD ;
243+ sig -> sz = 1 ;
244+ if (ORTE_SUCCESS != (rc = orte_grpcomm .xcast (sig , ORTE_RML_TAG_DAEMON , & buf ))) {
245+ ORTE_ERROR_LOG (rc );
246+ OBJ_RELEASE (sig );
247+ OBJ_DESTRUCT (& buf );
248+ ORTE_FORCED_TERMINATE (ORTE_ERROR_DEFAULT_EXIT_CODE );
249+ OBJ_RELEASE (caddy );
250+ return ;
251+ }
252+ OBJ_DESTRUCT (& buf );
253+ /* maintain accounting */
254+ OBJ_RELEASE (sig );
255+
180256 /* progress the job */
181257 caddy -> jdata -> state = ORTE_JOB_STATE_DAEMONS_REPORTED ;
182258 ORTE_ACTIVATE_JOB_STATE (caddy -> jdata , ORTE_JOB_STATE_VM_READY );
0 commit comments