PennMUSH Community

Changeset 1224

Show
Ignore:
Timestamp:
03/07/08 01:41:17 (7 months ago)
Author:
shawnw
Message:

Detect a repeatedly crashing info_slave and work around it.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • 1.8.3/branches/devel/CHANGES.183

    r1219 r1224  
    4949 * Fixes from 1.8.2p9 
    5050 * Compilation fixes on various platforms. [SW] 
     51 * A repeatingly crashing info_slave won't block login attempts. 
    5152 
    5253Version 1.8.3 patchlevel 6                      Jan 01, 2008 
  • 1.8.3/branches/devel/hdrs/lookup.h

    r905 r1224  
    4646extern int info_slave; 
    4747extern time_t info_queue_time; 
     48extern bool info_slave_halted; 
    4849 
    4950enum is_state { INFO_SLAVE_DOWN, INFO_SLAVE_READY, INFO_SLAVE_PENDING }; 
     
    5253 
    5354void init_info_slave(void); 
    54 void make_info_slave(void); 
    5555void query_info_slave(int fd); 
    5656void update_pending_info_slaves(void); 
  • 1.8.3/branches/devel/src/bsd.c

    r1213 r1224  
    311311static void shovechars(Port_t port, Port_t sslport); 
    312312static int test_connection(int newsock); 
    313 #ifndef INFO_SLAVE 
    314 static DESC *new_connection(int oldsock, int *result, int use_ssl); 
    315 #endif 
     313static DESC *new_connection(int oldsock, int *result, bool use_ssl); 
    316314 
    317315static void clearstrings(DESC *d); 
     
    860858 
    861859static void 
     860setup_desc(int sock, bool use_ssl) 
     861{ 
     862  DESC *newd; 
     863  int result; 
     864 
     865  if (!(newd = new_connection(sock, &result, use_ssl))) { 
     866    if (test_connection(result) < 0) 
     867      return; 
     868  } else { 
     869    ndescriptors++; 
     870    if (newd->descriptor >= maxd) 
     871      maxd = newd->descriptor + 1; 
     872  } 
     873} 
     874 
     875static void 
    862876shovechars(Port_t port, Port_t sslport __attribute__ ((__unused__))) 
    863877{ 
     
    872886  int queue_timeout; 
    873887  DESC *d, *dnext; 
    874 #ifndef INFO_SLAVE 
    875   DESC *newd; 
    876   int result; 
    877 #endif 
    878888  int avail_descriptors; 
    879889#ifdef INFO_SLAVE 
     
    10601070 
    10611071      if (FD_ISSET(sock, &input_set)) { 
    1062         addr_len = sizeof(addr); 
    1063         newsock = accept(sock, (struct sockaddr *) &addr, &addr_len); 
    1064         if (newsock < 0) { 
    1065           if (test_connection(newsock) < 0) 
    1066             continue;           /* this should _not_ be return. */ 
    1067         } 
    1068         ndescriptors++; 
    1069         query_info_slave(newsock); 
    1070         if (newsock >= maxd) 
    1071           maxd = newsock + 1; 
     1072    if (!info_slave_halted) { 
     1073      addr_len = sizeof(addr); 
     1074      newsock = accept(sock, (struct sockaddr *) &addr, &addr_len); 
     1075      if (newsock < 0) { 
     1076        if (test_connection(newsock) < 0) 
     1077          continue;           /* this should _not_ be return. */ 
     1078      } 
     1079      ndescriptors++; 
     1080      query_info_slave(newsock); 
     1081      if (newsock >= maxd) 
     1082        maxd = newsock + 1; 
     1083    } else  
     1084      setup_desc(sock, false); 
    10721085      } 
    10731086#ifdef HAS_OPENSSL 
    10741087      if (sslsock && FD_ISSET(sslsock, &input_set)) { 
    1075         addr_len = sizeof(addr); 
    1076         newsock = accept(sslsock, (struct sockaddr *) &addr, &addr_len); 
    1077         if (newsock < 0) { 
    1078           if (test_connection(newsock) < 0) 
    1079             continue;           /* this should _not_ be return. */ 
    1080         } 
    1081         ndescriptors++; 
    1082         query_info_slave(newsock); 
    1083         if (newsock >= maxd) 
    1084           maxd = newsock + 1; 
     1088    if (!info_slave_halted) { 
     1089      addr_len = sizeof(addr); 
     1090      newsock = accept(sslsock, (struct sockaddr *) &addr, &addr_len); 
     1091      if (newsock < 0) { 
     1092        if (test_connection(newsock) < 0) 
     1093          continue;           /* this should _not_ be return. */ 
     1094      } 
     1095      ndescriptors++; 
     1096      query_info_slave(newsock); 
     1097      if (newsock >= maxd) 
     1098        maxd = newsock + 1; 
     1099    } else 
     1100      setup_desc(sslsock, true); 
    10851101      } 
    10861102#endif 
    10871103#else                           /* INFO_SLAVE */ 
    1088       if (FD_ISSET(sock, &input_set)) { 
    1089         if (!(newd = new_connection(sock, &result, 0))) { 
    1090           if (test_connection(result) < 0) 
    1091             continue;           /* this should _not_ be return. */ 
    1092         } else { 
    1093           ndescriptors++; 
    1094           if (newd->descriptor >= maxd) 
    1095             maxd = newd->descriptor + 1; 
    1096         } 
    1097       } 
     1104      if (FD_ISSET(sock, &input_set))  
     1105    setup_desc(sock, false); 
    10981106#ifdef HAS_OPENSSL 
    1099       if (sslsock && FD_ISSET(sslsock, &input_set)) { 
    1100         if (!(newd = new_connection(sslsock, &result, 1))) { 
    1101           if (test_connection(result) < 0) 
    1102             continue;           /* this should _not_ be return. */ 
    1103         } else { 
    1104           ndescriptors++; 
    1105           if (newd->descriptor >= maxd) 
    1106             maxd = newd->descriptor + 1; 
    1107         } 
    1108       } 
     1107      if (sslsock && FD_ISSET(sslsock, &input_set))  
     1108    setup_desc(sslsock, true); 
    11091109#endif 
    11101110#endif 
     
    11481148} 
    11491149 
    1150  
    1151 #ifndef INFO_SLAVE 
    11521150static DESC * 
    1153 new_connection(int oldsock, int *result, int use_ssl) 
     1151new_connection(int oldsock, int *result, bool use_ssl) 
    11541152{ 
    11551153  int newsock; 
     
    12081206  return initializesock(newsock, tbuf1, tbuf2, use_ssl); 
    12091207} 
    1210 #endif 
    12111208 
    12121209static void 
  • 1.8.3/branches/devel/src/info_master.c

    r1223 r1224  
    5353#endif 
    5454 
     55static bool make_info_slave(void); 
     56 
    5557static fd_set info_pending; /**< Keep track of fds pending a slave lookup */ 
    5658static int pending_max = 0; 
     
    6062time_t info_queue_time; /**< Time of last write to slave */ 
    6163 
     64static int startup_attempts = 0; /**< How many times has info_slave been started? */ 
     65static time_t startup_window; 
     66#define MAX_ATTEMPTS 5 /**< Error out after this many startup attempts in 60 seconds */ 
     67 
     68bool info_slave_halted = false; 
    6269 
    6370 /* From bsd.c */ 
     
    7279  int newsock; 
    7380 
    74   now = time(NULL); 
     81  time(&now); 
    7582 
    7683  if (info_slave_state == INFO_SLAVE_PENDING && now > info_queue_time + 30) { 
     
    9097} 
    9198 
    92 void 
     99bool 
    93100make_info_slave(void) 
    94101{ 
     
    102109    info_slave_state = INFO_SLAVE_DOWN; 
    103110  } 
     111 
     112  if (startup_attempts == 0) 
     113    time(&startup_window); 
     114 
     115  startup_attempts += 1; 
     116 
     117  if (startup_attempts > MAX_ATTEMPTS) { 
     118    time_t now; 
     119 
     120    time(&now); 
     121    if (difftime(now, startup_window) <= 60.0) { 
     122      /* Too many failed attempts to start info_slave in 1 minute */ 
     123      do_rawlog(LT_ERR, T("Disabling info_slave due to too many errors.")); 
     124      info_slave_halted = true; 
     125      return false; 
     126    } else { 
     127      /* Reset counter */ 
     128      startup_window = now; 
     129      startup_attempts = 0; 
     130    } 
     131  } 
     132 
     133 
    104134#ifndef AF_LOCAL 
    105135  /* Use Posix.1g names. */ 
     
    110140  if (socketpair(AF_LOCAL, SOCK_DGRAM, 0, socks) < 0) { 
    111141    penn_perror("creating slave datagram socketpair"); 
    112     return
     142    return false
    113143  } 
    114144  if (socks[0] >= maxd) 
     
    125155    closesocket(socks[1]); 
    126156#endif 
    127     return
     157    return false
    128158  } else if (child > 0) { 
    129159    info_slave_state = INFO_SLAVE_READY; 
     
    180210    if (FD_ISSET(n, &info_pending)) 
    181211      query_info_slave(n); 
     212   
     213  return true; 
    182214} 
    183215 
     
    197229 
    198230  if (info_slave_state == INFO_SLAVE_DOWN) { 
    199     make_info_slave(); 
    200     return; 
    201   } 
     231    if (!make_info_slave()) { 
     232      FD_CLR(fd, &info_pending); 
     233      closesocket(fd); /* Just drop the connection if the slave gets halted. 
     234              A subsequent reconnect will work. */ 
     235    } 
     236    return; 
     237  } 
     238 
    202239 
    203240  memset(&req, 0, sizeof req); 
     
    336373  WAIT_TYPE my_stat; 
    337374  pid_t pid; 
    338   struct timeval pad; 
    339375 
    340376  if (info_slave_state != INFO_SLAVE_DOWN) { 
     
    348384      /* Have to wait long enough for the info_slave to actually 
    349385         die. This will hopefully be enough time. */ 
    350       pad.tv_sec = 0; 
    351       pad.tv_usec = 100; 
    352       select(0, NULL, NULL, NULL, &pad); 
     386      usleep(100); 
    353387 
    354388      pid = mush_wait(info_slave_pid, &my_stat, WNOHANG); 
  • 1.8.3/branches/devel/src/info_slave.c

    r1223 r1224  
    204204      if (getnameinfo(&req.remote.addr, req.rlen, resp.hostname, 
    205205                      sizeof resp.hostname, NULL, 0, NI_NUMERICSERV) != 0) 
    206         strcpy(resp.hostname, "An error occured"); 
     206        strcpy(resp.hostname, resp.ipaddr); 
    207207    } else 
    208208      strcpy(resp.hostname, resp.ipaddr);