parallel-vm.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. #!/usr/bin/env python2
  2. #
  3. # Parallel VM test case executor
  4. # Copyright (c) 2014-2015, Jouni Malinen <j@w1.fi>
  5. #
  6. # This software may be distributed under the terms of the BSD license.
  7. # See README for more details.
  8. import curses
  9. import fcntl
  10. import logging
  11. import os
  12. import subprocess
  13. import sys
  14. import time
  15. logger = logging.getLogger()
  16. def get_failed(vm):
  17. failed = []
  18. for i in range(num_servers):
  19. failed += vm[i]['failed']
  20. return failed
  21. def vm_read_stdout(vm, i):
  22. global total_started, total_passed, total_failed, total_skipped
  23. global rerun_failures
  24. ready = False
  25. try:
  26. out = vm['proc'].stdout.read()
  27. except:
  28. return False
  29. logger.debug("VM[%d] stdout.read[%s]" % (i, out))
  30. pending = vm['pending'] + out
  31. lines = []
  32. while True:
  33. pos = pending.find('\n')
  34. if pos < 0:
  35. break
  36. line = pending[0:pos].rstrip()
  37. pending = pending[(pos + 1):]
  38. logger.debug("VM[%d] stdout full line[%s]" % (i, line))
  39. if line.startswith("READY"):
  40. ready = True
  41. elif line.startswith("PASS"):
  42. ready = True
  43. total_passed += 1
  44. elif line.startswith("FAIL"):
  45. ready = True
  46. total_failed += 1
  47. name = line.split(' ')[1]
  48. logger.debug("VM[%d] test case failed: %s" % (i, name))
  49. vm['failed'].append(name)
  50. elif line.startswith("NOT-FOUND"):
  51. ready = True
  52. total_failed += 1
  53. logger.info("VM[%d] test case not found" % i)
  54. elif line.startswith("SKIP"):
  55. ready = True
  56. total_skipped += 1
  57. elif line.startswith("START"):
  58. total_started += 1
  59. vm['out'] += line + '\n'
  60. lines.append(line)
  61. vm['pending'] = pending
  62. return ready
  63. def show_progress(scr):
  64. global num_servers
  65. global vm
  66. global dir
  67. global timestamp
  68. global tests
  69. global first_run_failures
  70. global total_started, total_passed, total_failed, total_skipped
  71. total_tests = len(tests)
  72. logger.info("Total tests: %d" % total_tests)
  73. scr.leaveok(1)
  74. scr.addstr(0, 0, "Parallel test execution status", curses.A_BOLD)
  75. for i in range(0, num_servers):
  76. scr.addstr(i + 1, 0, "VM %d:" % (i + 1), curses.A_BOLD)
  77. scr.addstr(i + 1, 10, "starting VM")
  78. scr.addstr(num_servers + 1, 0, "Total:", curses.A_BOLD)
  79. scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests))
  80. scr.refresh()
  81. completed_first_pass = False
  82. rerun_tests = []
  83. while True:
  84. running = False
  85. first_running = False
  86. updated = False
  87. for i in range(0, num_servers):
  88. if completed_first_pass:
  89. continue
  90. if vm[i]['first_run_done']:
  91. continue
  92. if not vm[i]['proc']:
  93. continue
  94. if vm[i]['proc'].poll() is not None:
  95. vm[i]['proc'] = None
  96. scr.move(i + 1, 10)
  97. scr.clrtoeol()
  98. log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
  99. with open(log, 'r') as f:
  100. if "Kernel panic" in f.read():
  101. scr.addstr("kernel panic")
  102. logger.info("VM[%d] kernel panic" % i)
  103. else:
  104. scr.addstr("unexpected exit")
  105. logger.info("VM[%d] unexpected exit" % i)
  106. updated = True
  107. continue
  108. running = True
  109. first_running = True
  110. try:
  111. err = vm[i]['proc'].stderr.read()
  112. vm[i]['err'] += err
  113. logger.debug("VM[%d] stderr.read[%s]" % (i, err))
  114. except:
  115. pass
  116. if vm_read_stdout(vm[i], i):
  117. scr.move(i + 1, 10)
  118. scr.clrtoeol()
  119. updated = True
  120. if not tests:
  121. vm[i]['first_run_done'] = True
  122. scr.addstr("completed first round")
  123. logger.info("VM[%d] completed first round" % i)
  124. continue
  125. else:
  126. name = tests.pop(0)
  127. vm[i]['proc'].stdin.write(name + '\n')
  128. scr.addstr(name)
  129. logger.debug("VM[%d] start test %s" % (i, name))
  130. if not first_running and not completed_first_pass:
  131. logger.info("First round of testing completed")
  132. if tests:
  133. logger.info("Unexpected test cases remaining from first round: " + str(tests))
  134. raise Exception("Unexpected test cases remaining from first round")
  135. completed_first_pass = True
  136. for name in get_failed(vm):
  137. if rerun_failures:
  138. rerun_tests.append(name)
  139. first_run_failures.append(name)
  140. for i in range(num_servers):
  141. if not completed_first_pass:
  142. continue
  143. if not vm[i]['proc']:
  144. continue
  145. if vm[i]['proc'].poll() is not None:
  146. vm[i]['proc'] = None
  147. scr.move(i + 1, 10)
  148. scr.clrtoeol()
  149. log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
  150. with open(log, 'r') as f:
  151. if "Kernel panic" in f.read():
  152. scr.addstr("kernel panic")
  153. logger.info("VM[%d] kernel panic" % i)
  154. else:
  155. scr.addstr("completed run")
  156. logger.info("VM[%d] completed run" % i)
  157. updated = True
  158. continue
  159. running = True
  160. try:
  161. err = vm[i]['proc'].stderr.read()
  162. vm[i]['err'] += err
  163. logger.debug("VM[%d] stderr.read[%s]" % (i, err))
  164. except:
  165. pass
  166. ready = False
  167. if vm[i]['first_run_done']:
  168. vm[i]['first_run_done'] = False
  169. ready = True
  170. else:
  171. ready = vm_read_stdout(vm[i], i)
  172. if ready:
  173. scr.move(i + 1, 10)
  174. scr.clrtoeol()
  175. updated = True
  176. if not rerun_tests:
  177. vm[i]['proc'].stdin.write('\n')
  178. scr.addstr("shutting down")
  179. logger.info("VM[%d] shutting down" % i)
  180. else:
  181. name = rerun_tests.pop(0)
  182. vm[i]['proc'].stdin.write(name + '\n')
  183. scr.addstr(name + "(*)")
  184. logger.debug("VM[%d] start test %s (*)" % (i, name))
  185. if not running:
  186. break
  187. if updated:
  188. scr.move(num_servers + 1, 10)
  189. scr.clrtoeol()
  190. scr.addstr("{} %".format(int(100.0 * (total_passed + total_failed + total_skipped) / total_tests)))
  191. scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED={} PASS={} FAIL={} SKIP={}".format(total_tests, total_started, total_passed, total_failed, total_skipped))
  192. failed = get_failed(vm)
  193. if len(failed) > 0:
  194. scr.move(num_servers + 2, 0)
  195. scr.clrtoeol()
  196. scr.addstr("Failed test cases: ")
  197. count = 0
  198. for f in failed:
  199. count += 1
  200. if count > 30:
  201. scr.addstr('...')
  202. scr.clrtoeol()
  203. break
  204. scr.addstr(f)
  205. scr.addstr(' ')
  206. scr.move(0, 35)
  207. scr.clrtoeol()
  208. if rerun_tests:
  209. scr.addstr("(RETRY FAILED %d)" % len(rerun_tests))
  210. elif rerun_failures:
  211. pass
  212. elif first_run_failures:
  213. scr.addstr("(RETRY FAILED)")
  214. scr.refresh()
  215. time.sleep(0.25)
  216. scr.refresh()
  217. time.sleep(0.3)
  218. def main():
  219. import argparse
  220. import os
  221. global num_servers
  222. global vm
  223. global dir
  224. global timestamp
  225. global tests
  226. global first_run_failures
  227. global total_started, total_passed, total_failed, total_skipped
  228. global rerun_failures
  229. total_started = 0
  230. total_passed = 0
  231. total_failed = 0
  232. total_skipped = 0
  233. debug_level = logging.INFO
  234. rerun_failures = True
  235. timestamp = int(time.time())
  236. scriptsdir = os.path.dirname(os.path.realpath(sys.argv[0]))
  237. p = argparse.ArgumentParser(description='run multiple testing VMs in parallel')
  238. p.add_argument('num_servers', metavar='number of VMs', type=int, choices=range(1, 100),
  239. help="number of VMs to start")
  240. p.add_argument('-f', dest='testmodules', metavar='<test module>',
  241. help='execute only tests from these test modules',
  242. type=str, nargs='+')
  243. p.add_argument('-1', dest='no_retry', action='store_const', const=True, default=False,
  244. help="don't retry failed tests automatically")
  245. p.add_argument('--debug', dest='debug', action='store_const', const=True, default=False,
  246. help="enable debug logging")
  247. p.add_argument('--codecov', dest='codecov', action='store_const', const=True, default=False,
  248. help="enable code coverage collection")
  249. p.add_argument('--shuffle-tests', dest='shuffle', action='store_const', const=True, default=False,
  250. help="shuffle test cases to randomize order")
  251. p.add_argument('--long', dest='long', action='store_const', const=True,
  252. default=False,
  253. help="include long-duration test cases")
  254. p.add_argument('params', nargs='*')
  255. args = p.parse_args()
  256. num_servers = args.num_servers
  257. rerun_failures = not args.no_retry
  258. if args.debug:
  259. debug_level = logging.DEBUG
  260. extra_args = []
  261. if args.long:
  262. extra_args += [ '--long' ]
  263. if args.codecov:
  264. print "Code coverage - build separate binaries"
  265. logdir = "/tmp/hwsim-test-logs/" + str(timestamp)
  266. os.makedirs(logdir)
  267. subprocess.check_call([os.path.join(scriptsdir, 'build-codecov.sh'),
  268. logdir])
  269. codecov_args = ['--codecov_dir', logdir]
  270. codecov = True
  271. else:
  272. codecov_args = []
  273. codecov = False
  274. first_run_failures = []
  275. tests = []
  276. cmd = [ os.path.join(os.path.dirname(scriptsdir), 'run-tests.py'), '-L' ]
  277. if args.testmodules:
  278. cmd += [ "-f" ]
  279. cmd += args.testmodules
  280. lst = subprocess.Popen(cmd, stdout=subprocess.PIPE)
  281. for l in lst.stdout.readlines():
  282. name = l.split(' ')[0]
  283. tests.append(name)
  284. if len(tests) == 0:
  285. sys.exit("No test cases selected")
  286. dir = '/tmp/hwsim-test-logs'
  287. try:
  288. os.mkdir(dir)
  289. except:
  290. pass
  291. if args.shuffle:
  292. from random import shuffle
  293. shuffle(tests)
  294. elif num_servers > 2 and len(tests) > 100:
  295. # Move test cases with long duration to the beginning as an
  296. # optimization to avoid last part of the test execution running a long
  297. # duration test case on a single VM while all other VMs have already
  298. # completed their work.
  299. long = [ "ap_roam_open",
  300. "wpas_mesh_password_mismatch_retry",
  301. "wpas_mesh_password_mismatch",
  302. "hostapd_oom_wpa2_psk_connect",
  303. "ap_hs20_fetch_osu_stop",
  304. "ap_roam_wpa2_psk",
  305. "ibss_wpa_none_ccmp",
  306. "nfc_wps_er_handover_pk_hash_mismatch_sta",
  307. "go_neg_peers_force_diff_freq",
  308. "p2p_cli_invite",
  309. "sta_ap_scan_2b",
  310. "ap_pmf_sta_unprot_deauth_burst",
  311. "ap_bss_add_remove_during_ht_scan",
  312. "wext_scan_hidden",
  313. "autoscan_exponential",
  314. "nfc_p2p_client",
  315. "wnm_bss_keep_alive",
  316. "ap_inactivity_disconnect",
  317. "scan_bss_expiration_age",
  318. "autoscan_periodic",
  319. "discovery_group_client",
  320. "concurrent_p2pcli",
  321. "ap_bss_add_remove",
  322. "wpas_ap_wps",
  323. "wext_pmksa_cache",
  324. "ibss_wpa_none",
  325. "ap_ht_40mhz_intolerant_ap",
  326. "ibss_rsn",
  327. "discovery_pd_retries",
  328. "ap_wps_setup_locked_timeout",
  329. "ap_vht160",
  330. "dfs_radar",
  331. "dfs",
  332. "grpform_cred_ready_timeout",
  333. "hostapd_oom_wpa2_eap_connect",
  334. "wpas_ap_dfs",
  335. "ap_wps_pbc_timeout" ]
  336. for l in long:
  337. if l in tests:
  338. tests.remove(l)
  339. tests.insert(0, l)
  340. logger.setLevel(debug_level)
  341. log_handler = logging.FileHandler('parallel-vm.log')
  342. log_handler.setLevel(debug_level)
  343. fmt = "%(asctime)s %(levelname)s %(message)s"
  344. log_formatter = logging.Formatter(fmt)
  345. log_handler.setFormatter(log_formatter)
  346. logger.addHandler(log_handler)
  347. vm = {}
  348. for i in range(0, num_servers):
  349. print("\rStarting virtual machine {}/{}".format(i + 1, num_servers)),
  350. logger.info("Starting virtual machine {}/{}".format(i + 1, num_servers))
  351. cmd = [os.path.join(scriptsdir, 'vm-run.sh'), '--delay', str(i),
  352. '--timestamp', str(timestamp),
  353. '--ext', 'srv.%d' % (i + 1),
  354. '-i'] + codecov_args + extra_args
  355. vm[i] = {}
  356. vm[i]['first_run_done'] = False
  357. vm[i]['proc'] = subprocess.Popen(cmd,
  358. stdin=subprocess.PIPE,
  359. stdout=subprocess.PIPE,
  360. stderr=subprocess.PIPE)
  361. vm[i]['out'] = ""
  362. vm[i]['pending'] = ""
  363. vm[i]['err'] = ""
  364. vm[i]['failed'] = []
  365. for stream in [ vm[i]['proc'].stdout, vm[i]['proc'].stderr ]:
  366. fd = stream.fileno()
  367. fl = fcntl.fcntl(fd, fcntl.F_GETFL)
  368. fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
  369. print
  370. curses.wrapper(show_progress)
  371. with open('{}/{}-parallel.log'.format(dir, timestamp), 'w') as f:
  372. for i in range(0, num_servers):
  373. f.write('VM {}\n{}\n{}\n'.format(i, vm[i]['out'], vm[i]['err']))
  374. failed = get_failed(vm)
  375. if first_run_failures:
  376. print "Failed test cases:"
  377. for f in first_run_failures:
  378. print f,
  379. logger.info("Failed: " + f)
  380. print
  381. double_failed = []
  382. for name in failed:
  383. double_failed.append(name)
  384. for test in first_run_failures:
  385. double_failed.remove(test)
  386. if not rerun_failures:
  387. pass
  388. elif failed and not double_failed:
  389. print "All failed cases passed on retry"
  390. logger.info("All failed cases passed on retry")
  391. elif double_failed:
  392. print "Failed even on retry:"
  393. for f in double_failed:
  394. print f,
  395. logger.info("Failed on retry: " + f)
  396. print
  397. res = "TOTAL={} PASS={} FAIL={} SKIP={}".format(total_started,
  398. total_passed,
  399. total_failed,
  400. total_skipped)
  401. print(res)
  402. logger.info(res)
  403. print "Logs: " + dir + '/' + str(timestamp)
  404. logger.info("Logs: " + dir + '/' + str(timestamp))
  405. for i in range(0, num_servers):
  406. if len(vm[i]['pending']) > 0:
  407. logger.info("Unprocessed stdout from VM[%d]: '%s'" %
  408. (i, vm[i]['pending']))
  409. log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
  410. with open(log, 'r') as f:
  411. if "Kernel panic" in f.read():
  412. print "Kernel panic in " + log
  413. logger.info("Kernel panic in " + log)
  414. if codecov:
  415. print "Code coverage - preparing report"
  416. for i in range(num_servers):
  417. subprocess.check_call([os.path.join(scriptsdir,
  418. 'process-codecov.sh'),
  419. logdir + ".srv.%d" % (i + 1),
  420. str(i)])
  421. subprocess.check_call([os.path.join(scriptsdir, 'combine-codecov.sh'),
  422. logdir])
  423. print "file://%s/index.html" % logdir
  424. logger.info("Code coverage report: file://%s/index.html" % logdir)
  425. if double_failed or (failed and not rerun_failures):
  426. logger.info("Test run complete - failures found")
  427. sys.exit(2)
  428. if failed:
  429. logger.info("Test run complete - failures found on first run; passed on retry")
  430. sys.exit(1)
  431. logger.info("Test run complete - no failures")
  432. sys.exit(0)
  433. if __name__ == "__main__":
  434. main()