The VisAO Camera
sysmonD.cpp
Go to the documentation of this file.
1 /************************************************************
2 * sysmonD.cpp
3 *
4 * Author: Jared R. Males (jrmales@email.arizona.edu)
5 *
6 * Definitions for the sysmonD system monitor
7 *
8 * Developed as part of the Magellan Adaptive Optics system.
9 ************************************************************/
10 
11 /** \file sysmonD.cpp
12  * \author Jared R. Males
13  * \brief Definitions for the sysmonD system monitor
14  *
15 */
16 
17 #include "sysmonD.h"
18 #include <setjmp.h>
19 
20 //extern int qtitimeout;
21 
22 namespace VisAO
23 {
24 
25 sysmonD::sysmonD(int argc, char **argv) throw (AOException) : VisAOApp_standalone(argc, argv)
26 {
27  init_sysmonD();
28 }
29 
30 sysmonD::sysmonD(std::string name, const std::string &conffile) throw (AOException) : VisAOApp_standalone(name, conffile)
31 {
32  init_sysmonD();
33 }
34 
35 void sysmonD::init_sysmonD()
36 {
37  //Get the QTI Air Temp Probe serial number
38  try
39  {
40  qtiAirSerialNo = (std::string)(ConfigDictionary())["airTemp_serial_no"];
41  haveQTIAir = true;
42  _logger->log(Logger::LOG_LEV_INFO, "Set air temp probe ser. no. to: %s", qtiAirSerialNo.c_str());
43  }
44  catch(Config_File_Exception)
45  {
46  haveQTIAir = false;
47  _logger->log(Logger::LOG_LEV_INFO, "Air temp probe ser. no. not found");
48  }
49 
50  //Get the QTI Joe 47 Temp Probe serial number
51  try
52  {
53  qtiJoe47SerialNo = (std::string)(ConfigDictionary())["joe47Temp_serial_no"];
54  haveQTIJoe47 = true;
55  _logger->log(Logger::LOG_LEV_INFO, "Set joe 47 temp probe ser. no. to: %s", qtiJoe47SerialNo.c_str());
56  }
57  catch(Config_File_Exception)
58  {
59  haveQTIJoe47 = false;
60  _logger->log(Logger::LOG_LEV_INFO, "Joe 47 temp probe ser. no. not found");
61  }
62 
63  //Get the QTI Ambient Temp Probe serial number
64  try
65  {
66  qtiAmbSerialNo = (std::string)(ConfigDictionary())["ambTemp_serial_no"];
67  haveQTIAmb = true;
68  _logger->log(Logger::LOG_LEV_INFO, "Set ambient temp probe ser. no. to: %s", qtiAmbSerialNo.c_str());
69  }
70  catch(Config_File_Exception)
71  {
72  haveQTIAmb = false;
73  _logger->log(Logger::LOG_LEV_INFO, "Ambient temp probe ser. no. not found");
74  }
75 
76  nprobes = 3;
77 
78  //Init the status board
79  statusboard_shmemkey = STATUS_sysmonD;
81  {
83  _logger->log(Logger::LOG_LEV_ERROR, "Could not create status board.");
84  }
85  else
86  {
88  strncpy(bsb->appname, MyFullName().c_str(), 25);
89  bsb->max_update_interval = 5.*pause_time;
90  }
91 
92  _tempsLogger = Logger::get("VisAO Temperature", Logger::LOG_LEV_INFO, "TELEMETRY");
93 
94  //Read config data.
95  try
96  {
97  core_temp_warn = (double)(ConfigDictionary())["core_temp_warn"];
98  core_temp_limit = (double)(ConfigDictionary())["core_temp_limit"];
99  hdd_used_warn = (double)(ConfigDictionary())["hdd_used_warn"];
100  hdd_used_limit = (double)(ConfigDictionary())["hdd_used_limit"];
101  hdd_temp_warn = (double)(ConfigDictionary())["hdd_temp_warn"];
102  hdd_temp_limit = (double)(ConfigDictionary())["hdd_temp_limit"];
103  gpu_temp_warn = (double)(ConfigDictionary())["gpu_temp_warn"];
104  gpu_temp_limit = (double)(ConfigDictionary())["gpu_temp_limit"];
105  air_temp_warn = (double)(ConfigDictionary())["air_temp_warn"];
106  air_temp_limit = (double)(ConfigDictionary())["air_temp_limit"];
107  joe_temp_warn = (double)(ConfigDictionary())["joe_temp_warn"];
108  joe_temp_limit = (double)(ConfigDictionary())["joe_temp_limit"];
109  }
110  catch(Config_File_Exception)
111  {
112  _logger->log(Logger::LOG_LEV_FATAL, "Missing warning and limit data");
113  throw;
114  }
115 
116 
117 }
118 
119 int sysmonD::get_sysstat()
120 {
121  double cpun, user, nice, sys, iowait, irq, soft, steal, intrs;
122 
123  std::string com;
124  com = "sh ";
125  com += getenv("VISAO_ROOT");
126  com += "/bin/get_sysstat.sh";
127  //system("sh ./get_sysstat.sh");
128  //std::cout << com << "\n";
129 
130  system(com.c_str());
131 
132  fin.open("sysstat.txt");
133 
134  for(int i=0; i<SYS_N_CORES; i++)
135  {
136  fin >> core_temps[i];
137  fin >> core_max[i];
138  }
139 
140  fin >> mem_tot;
141  fin >> mem_used;
142  fin >> mem_free;
143  fin >> mem_shared;
144  fin >> mem_buff;
145  fin >> mem_cached;
146  fin >> mem_buff;
147  fin >> mem_free;
148  std::cout << "mem_free: " << mem_free << "\n";
149 
150  fin >> swap_tot;
151  fin >> swap_used;
152  fin >> swap_free;
153 
154  fin >> dfroot_size;
155  fin >> dfroot_used;
156  fin >> dfroot_avail;
157 
158  for(int i=0; i<SYS_N_VCORES; i++)
159  {
160  fin >> cpun;
161  fin >> user;
162  fin >> nice;
163  fin >> sys;
164  fin >> iowait;
165  fin >> irq;
166  fin >> soft;
167  fin >> steal;
168  fin >> core_idle[i];
169  fin >> intrs;
170  }
171 
172  fin.close();
173 
174  visao_mdstat(raid_stat);
175 
176  return 0;
177 }
178 
179 int sysmonD::get_GPUstat()
180 {
181  std::string tmp;
182  system("nvidia-smi | grep Default > GPUstat.txt");
183 
184  fin.open("GPUstat.txt");
185 
186  fin >> tmp;
187 
188  fin >> tmp;
189 
190  fin >> GPUTemp;
191 
192  for(int i=0;i<6; i++) fin >> tmp;
193 
194  fin >> GPUMemUsage;
195 
196  fin.close();
197 
198  return 0;
199 
200 }
201 
202 int sysmonD::get_HDDstat()
203 {
204  std::string tmp;
205 
206  system("/usr/sbin/smartctl /dev/sda --all | grep Temperature_Celsius > HDDstat.txt");
207 
208  fin.open("HDDstat.txt");
209 
210  for(int i=0; i < 10;i++) fin >> tmp;
211 
212  fin.close();
213 
214 
215  HDDTemp_a = atoi(tmp.c_str());
216  std::cout << HDDTemp_a << "\n";
217 
218  system("/usr/sbin/smartctl /dev/sdb --all | grep Temperature_Celsius > HDDstat.txt");
219  fin.open("HDDstat.txt");
220  for(int i=0; i < 10;i++) fin >> tmp;
221  fin.close();
222 
223  HDDTemp_b = atoi(tmp.c_str());
224 
225  return 0;
226 }
227 
228 sysmonD * global_sysmonD;
229 
230 static void timeout_handler(int sig, siginfo_t *si, void *uc)
231 {
232  try{
233  global_sysmonD->qtiTimeout();
234  }
235  catch(...)
236  {
237  std::cout << "caught in handler\n";
238  }
239  std::cout << "Sighandler returning" << std::endl;
240 }
241 
242 
243 
244 int sysmonD::setupQTI()
245 {
246  timer_t timerid;
247  struct sigevent sev;
248  struct itimerspec its, itsdisarm;
249  long long freq_nanosecs;
250 
251  struct sigaction sa;
252 
253  global_sysmonD = this;
254 
255  sa.sa_flags = SA_SIGINFO;
256  sa.sa_sigaction = timeout_handler;
257  sigemptyset(&sa.sa_mask);
258  sigaction(RTSIGTIMEOUT, &sa, NULL);
259 
260 
261  sev.sigev_notify = SIGEV_SIGNAL;
262  sev.sigev_signo = RTSIGTIMEOUT;
263  sev.sigev_value.sival_ptr = &timerid;
264 
265 
266  freq_nanosecs = (long long) 2.*1e9;
267  its.it_value.tv_sec = freq_nanosecs/1000000000;
268  its.it_value.tv_nsec = freq_nanosecs % 1000000000;
269  its.it_interval.tv_sec = its.it_value.tv_sec;
270  its.it_interval.tv_nsec = its.it_value.tv_nsec;
271 
272  itsdisarm.it_value.tv_sec = 0;
273  itsdisarm.it_value.tv_nsec = 0;
274  itsdisarm.it_interval.tv_sec = 0;
275  itsdisarm.it_interval.tv_nsec = 0;
276 
277 
278 
279  QTITempProbe qti;
280 
281  qtiProbes = new QTITempProbe[nprobes];
282 
283  char devpath[25];
284 
285  set_euid_called();
286 
287  timer_create(CLOCK_REALTIME, &sev, &timerid);
288 
289  for(int i=0; i < nprobes; i++)
290  {
291  snprintf(devpath,25, "/dev/ttyACM%i", i);
292  std::cout << "Reading: " << devpath << "\n";
293  qtiProbes[i].setDevPath(devpath);
294  //std::cout << "1" << std::endl;
295 
296 
297  timer_settime(timerid, 0, &its, 0);
298  qtiProbes[i].readSerialNumber();
299  timer_settime(timerid, 0, &itsdisarm, 0);
300  //timer_delete(timerid);
301  //std::cout << "timer deleted\n";
302 
303 // if(qtitimeout)
304 // {
305 // std::cout << "returning\n";
306 // return -1;
307 //
308 // //sigaction(RTSIGTIMEOUT, &sa, NULL);
309 // //longjmp(env, 1);
310 // }
311 
312  std::cout << "Got: " << qtiProbes[i].getSerialNumber() << "\n";
313 
314 
315  }
316 
317  set_euid_real();
318 
319  if(haveQTIAir)
320  {
321  qtiAirIndex = -1;
322  for(int i=0; i<nprobes; i++)
323  {
324  if(qtiProbes[i].getSerialNumber() == qtiAirSerialNo)
325  {
326  _logger->log(Logger::LOG_LEV_INFO, "Found Air Temp. Probe.");
327  qtiAirIndex = i;
328  break;
329  }
330  }
331  if(qtiAirIndex == -1)
332  {
333  _logger->log(Logger::LOG_LEV_INFO, "Could not find Air Temp. Probe.");
334  }
335  }
336 
337  if(haveQTIJoe47)
338  {
339  qtiJoe47Index = -1;
340  for(int i=0; i<nprobes; i++)
341  {
342  if(qtiProbes[i].getSerialNumber() == qtiJoe47SerialNo)
343  {
344  _logger->log(Logger::LOG_LEV_INFO, "Found Joe 47 Temp. Probe.");
345  qtiJoe47Index = i;
346  break;
347  }
348  }
349  if(qtiJoe47Index == -1)
350  {
351  _logger->log(Logger::LOG_LEV_INFO, "Could not find Joe47 Temp. Probe.");
352  }
353  }
354 
355  if(haveQTIAmb)
356  {
357  qtiAmbIndex = -1;
358  for(int i=0; i<nprobes; i++)
359  {
360  if(qtiProbes[i].getSerialNumber() == qtiAmbSerialNo)
361  {
362  _logger->log(Logger::LOG_LEV_INFO, "Found Ambient Temp. Probe.");
363  qtiAmbIndex = i;
364  break;
365  }
366  }
367  if(qtiAmbIndex == -1)
368  {
369  _logger->log(Logger::LOG_LEV_INFO, "Could not find Ambient Temp. Probe.");
370  }
371  }
372 
373  std::cout << "Done initing QTI devices" << std::endl;
374 
375  return 0;
376 }
377 
378 jmp_buf env;
379 
380 void sysmonD::qtiTimeout()
381 {
382 
383  ERROR_REPORT("Timed out looking for QTI teperature devices.");
384  longjmp(env,1);
385 
386 }
387 
388 
389 
390 
391 int sysmonD::get_QTITemps()
392 {
393  int rv;
394 
395  rv = set_euid_called();
396 
397  if(haveQTIAir && qtiAirIndex > -1)
398  {
399  airTemp = qtiProbes[qtiAirIndex].getTemperature();
400  if(airTemp < -100 || airTemp > 500) airTemp = 0.0;
401  }
402  else airTemp = -100.;
403  if(airTemp == -1000)
404  {
405  std::cout << "Timed out\n";
406  }
407  if(haveQTIJoe47 && qtiJoe47Index > -1)
408  {
409  joe47Temp = qtiProbes[qtiJoe47Index].getTemperature();
410  if(joe47Temp < -100 || joe47Temp > 500) joe47Temp = 0.0;
411  }
412  else joe47Temp = -100.;
413  if(joe47Temp == -1000)
414  {
415  std::cout << "Timed out\n";
416  }
417 
418  if(haveQTIAmb && qtiAmbIndex > -1)
419  {
420  ambTemp = qtiProbes[qtiAmbIndex].getTemperature();
421  if(ambTemp < -100 || ambTemp > 500) ambTemp = 0.0;
422  std::cout << ambTemp << "\n";
423  }
424  else ambTemp = -100.;
425 
426  rv = set_euid_real();
427  return 0;
428 }
429 
430 
432 {
433  //Install the main thread handler
435  {
436  ERROR_REPORT("Error installing main thread catcher.");
437  return -1;
438  }
439 
440  if (setjmp(env))
441  {
442  //std::cout << "we're back \n";
443  haveQTIAir = 0;
444  haveQTIJoe47 = 0;
445  haveQTIAmb = 0;
446  set_euid_real();
447  //delete[] qtiProbes;
448  }
449  else setupQTI();
450 
451  while(!TimeToDie)
452  {
453  //std::cout << 1 << std::endl;
454  get_sysstat();
455  //std::cout << 2 << std::endl;
456  get_GPUstat();
457  //std::cout << 3 << std::endl;
458  get_HDDstat();
459  //std::cout << 4 << std::endl;
460  get_QTITemps();
461  //std::cout << 5 << std::endl;
463 
464  if(pause_time >= 1) sleep(pause_time);
465  else usleep((int)(pause_time * 1e6));
466  }
467 
468  return 0;
469 }
470 
472 {
473  std::string logs;
474  char logstr[256];
475 
477  {
479 
481 
482  for(int i=0;i<SYS_N_CORES; i++)
483  {
484  ssb->core_temps[i] = core_temps[i];
485  ssb->core_max[i] = core_max[i];
486 
487  //std::cout << ssb->core_temps[i] << "/" << ssb->core_max[i] << "\n";
488  }
489 
490  for(int i=0;i<SYS_N_VCORES; i++)
491  {
492  ssb->core_idle[i] = core_idle[i];
493  }
494 
495  for(int i =0; i < SYS_N_LOGDRV; i++)
496  {
497  ssb->raid_stat[i] = raid_stat[i];
498  }
499 
500 
501  ssb->mem_tot = mem_tot;
502  ssb->mem_used = mem_used;
503  ssb->mem_free = mem_free;
504  ssb->mem_shared = mem_shared;
505  ssb->mem_buff = mem_buff;
506  ssb->mem_cached = mem_cached;
507 
508  ssb->swap_tot = swap_tot;
509  ssb->swap_used = swap_used;
510  ssb->swap_free = swap_free;
511 
512  ssb->dfroot_size = dfroot_size;
513  ssb->dfroot_used = dfroot_used;
514  ssb->dfroot_avail = dfroot_avail;
515 
516  ssb->GPUTemp = GPUTemp;
517  ssb->GPUMemUsage = GPUMemUsage;
518  ssb->HDDTemp_a = HDDTemp_a;
519  ssb->HDDTemp_b = HDDTemp_b;
520 
521  ssb->AirTemp = airTemp;
522  ssb->Joe47Temp = joe47Temp;
523 
524  logs = "";
525  for(int i=0;i<SYS_N_CORES; i++)
526  {
527  snprintf(logstr, 256, "%s %i", logstr, (int) core_temps[i]);
528  logs += logstr;
529  //std::cout << ssb->core_temps[i] << "/" << ssb->core_max[i] << "\n";
530  }
531 
532  ssb->core_temp_warn = core_temp_warn;
533  ssb->core_temp_limit = core_temp_limit;
534  ssb->hdd_used_warn = hdd_used_warn;
535  ssb->hdd_used_limit = hdd_used_limit;
536  ssb->hdd_temp_warn = hdd_temp_warn;
537  ssb->hdd_temp_limit = hdd_temp_limit;
538  ssb->gpu_temp_warn = gpu_temp_warn;
539  ssb->gpu_temp_limit = gpu_temp_limit;
540  ssb->air_temp_warn = air_temp_warn;
541  ssb->air_temp_limit = air_temp_limit;
542  ssb->joe_temp_warn = joe_temp_warn;
543  ssb->joe_temp_limit = joe_temp_limit;
544 
545  _tempsLogger->log(Logger::LOG_LEV_INFO, "%s %i %i %i %0.2f %0.2f", logs.c_str(), (int)GPUTemp, (int)HDDTemp_a, (int)HDDTemp_b, airTemp, joe47Temp);
546 
547  /*std::cout << ssb->mem_tot << "\n";
548  std::cout << ssb->mem_used << "\n";
549  std::cout << ssb->mem_free << "\n";
550  std::cout << ssb->mem_shared << "\n";
551  std::cout << ssb->mem_buff << "\n";
552  std::cout << ssb->mem_cached << "\n";
553  std::cout << ssb->swap_tot << "\n";
554  std::cout << ssb->swap_used << "\n";
555  std::cout << ssb->swap_free << "\n";
556  std::cout << ssb->dfroot_size << "\n";
557  std::cout << ssb->dfroot_used << "\n";
558  std::cout << ssb->dfroot_avail << "\n";*/
559  }
560  return 0;
561 }
562 
563 }//namespace VisAO
Declarations for the sysmonD system monitor.
virtual int Run()
The main loop.
Definition: sysmonD.cpp:431
int readSerialNumber()
The standalone VisAO application, does not interface with the AO Supervisor.
void * statusboard_shmemptr
The pointer to the shared memory block for the statusboard.
virtual int update_statusboard()
Update the status board.
key_t statusboard_shmemkey
The key used to lookup the shared memory.
#define SYS_N_VCORES
Number of virtual processors in the system.
Definition: libvisao.h:48
int TimeToDie
Global set by SIGTERM.
Logger * _tempsLogger
Temperature logger.
Definition: sysmonD.h:117
double pause_time
Time to pause during application main loop.
Definition: VisAOApp_base.h:84
sysmonD(int argc, char **argv)
Command line constructor.
Definition: sysmonD.cpp:25
int create_statusboard(size_t sz)
Creates and attaches to the statusboard shared memory.
virtual int update_statusboard()
Update the status board.
Definition: sysmonD.cpp:471
int set_euid_called()
Changes the user id of the process to euid_called.
virtual int install_sig_mainthread_catcher()
Install the SIG_MAINTHREAD signal catcher.
#define SYS_N_LOGDRV
Number of logical drives in the system.
Definition: libvisao.h:54
The namespace of VisAO software.
#define SYS_N_CORES
Number of physical processors in the system.
Definition: libvisao.h:45
int set_euid_real()
Changes the user id fo the process to the real user id.