parallel-vm.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. #!/usr/bin/env python2
  2. #
  3. # Parallel VM test case executor
  4. # Copyright (c) 2014, Jouni Malinen <j@w1.fi>
  5. #
  6. # This software may be distributed under the terms of the BSD license.
  7. # See README for more details.
  8. import curses
  9. import fcntl
  10. import logging
  11. import os
  12. import subprocess
  13. import sys
  14. import time
  15. logger = logging.getLogger()
  16. def get_failed(vm):
  17. failed = []
  18. for i in range(num_servers):
  19. failed += vm[i]['failed']
  20. return failed
  21. def vm_read_stdout(vm, i):
  22. global total_started, total_passed, total_failed, total_skipped
  23. ready = False
  24. try:
  25. out = vm['proc'].stdout.read()
  26. except:
  27. return False
  28. logger.debug("VM[%d] stdout.read[%s]" % (i, out))
  29. pending = vm['pending'] + out
  30. lines = []
  31. while True:
  32. pos = pending.find('\n')
  33. if pos < 0:
  34. break
  35. line = pending[0:pos].rstrip()
  36. pending = pending[(pos + 1):]
  37. logger.debug("VM[%d] stdout full line[%s]" % (i, line))
  38. if line.startswith("READY"):
  39. ready = True
  40. elif line.startswith("PASS"):
  41. ready = True
  42. total_passed += 1
  43. elif line.startswith("FAIL"):
  44. ready = True
  45. total_failed += 1
  46. name = line.split(' ')[1]
  47. logger.debug("VM[%d] test case failed: %s" % (i, name))
  48. vm['failed'].append(name)
  49. elif line.startswith("NOT-FOUND"):
  50. ready = True
  51. total_failed += 1
  52. logger.info("VM[%d] test case not found" % i)
  53. elif line.startswith("SKIP"):
  54. ready = True
  55. total_skipped += 1
  56. elif line.startswith("START"):
  57. total_started += 1
  58. vm['out'] += line + '\n'
  59. lines.append(line)
  60. vm['pending'] = pending
  61. return ready
  62. def show_progress(scr):
  63. global num_servers
  64. global vm
  65. global dir
  66. global timestamp
  67. global tests
  68. global first_run_failures
  69. global total_started, total_passed, total_failed, total_skipped
  70. total_tests = len(tests)
  71. logger.info("Total tests: %d" % total_tests)
  72. scr.leaveok(1)
  73. scr.addstr(0, 0, "Parallel test execution status", curses.A_BOLD)
  74. for i in range(0, num_servers):
  75. scr.addstr(i + 1, 0, "VM %d:" % (i + 1), curses.A_BOLD)
  76. scr.addstr(i + 1, 10, "starting VM")
  77. scr.addstr(num_servers + 1, 0, "Total:", curses.A_BOLD)
  78. scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests))
  79. scr.refresh()
  80. completed_first_pass = False
  81. rerun_tests = []
  82. while True:
  83. running = False
  84. first_running = False
  85. updated = False
  86. for i in range(0, num_servers):
  87. if completed_first_pass:
  88. continue
  89. if vm[i]['first_run_done']:
  90. continue
  91. if not vm[i]['proc']:
  92. continue
  93. if vm[i]['proc'].poll() is not None:
  94. vm[i]['proc'] = None
  95. scr.move(i + 1, 10)
  96. scr.clrtoeol()
  97. log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
  98. with open(log, 'r') as f:
  99. if "Kernel panic" in f.read():
  100. scr.addstr("kernel panic")
  101. logger.info("VM[%d] kernel panic" % i)
  102. else:
  103. scr.addstr("unexpected exit")
  104. logger.info("VM[%d] unexpected exit" % i)
  105. updated = True
  106. continue
  107. running = True
  108. first_running = True
  109. try:
  110. err = vm[i]['proc'].stderr.read()
  111. vm[i]['err'] += err
  112. logger.debug("VM[%d] stderr.read[%s]" % (i, err))
  113. except:
  114. pass
  115. if vm_read_stdout(vm[i], i):
  116. scr.move(i + 1, 10)
  117. scr.clrtoeol()
  118. updated = True
  119. if not tests:
  120. vm[i]['first_run_done'] = True
  121. scr.addstr("completed first round")
  122. logger.info("VM[%d] completed first round" % i)
  123. continue
  124. else:
  125. name = tests.pop(0)
  126. vm[i]['proc'].stdin.write(name + '\n')
  127. scr.addstr(name)
  128. logger.debug("VM[%d] start test %s" % (i, name))
  129. if not first_running and not completed_first_pass:
  130. logger.info("First round of testing completed")
  131. if tests:
  132. logger.info("Unexpected test cases remaining from first round: " + str(tests))
  133. raise Exception("Unexpected test cases remaining from first round")
  134. completed_first_pass = True
  135. for name in get_failed(vm):
  136. rerun_tests.append(name)
  137. first_run_failures.append(name)
  138. for i in range(num_servers):
  139. if not completed_first_pass:
  140. continue
  141. if not vm[i]['proc']:
  142. continue
  143. if vm[i]['proc'].poll() is not None:
  144. vm[i]['proc'] = None
  145. scr.move(i + 1, 10)
  146. scr.clrtoeol()
  147. log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
  148. with open(log, 'r') as f:
  149. if "Kernel panic" in f.read():
  150. scr.addstr("kernel panic")
  151. logger.info("VM[%d] kernel panic" % i)
  152. else:
  153. scr.addstr("completed run")
  154. logger.info("VM[%d] completed run" % i)
  155. updated = True
  156. continue
  157. running = True
  158. try:
  159. err = vm[i]['proc'].stderr.read()
  160. vm[i]['err'] += err
  161. logger.debug("VM[%d] stderr.read[%s]" % (i, err))
  162. except:
  163. pass
  164. ready = False
  165. if vm[i]['first_run_done']:
  166. vm[i]['first_run_done'] = False
  167. ready = True
  168. else:
  169. ready = vm_read_stdout(vm[i], i)
  170. if ready:
  171. scr.move(i + 1, 10)
  172. scr.clrtoeol()
  173. updated = True
  174. if not rerun_tests:
  175. vm[i]['proc'].stdin.write('\n')
  176. scr.addstr("shutting down")
  177. logger.info("VM[%d] shutting down" % i)
  178. else:
  179. name = rerun_tests.pop(0)
  180. vm[i]['proc'].stdin.write(name + '\n')
  181. scr.addstr(name + "(*)")
  182. logger.debug("VM[%d] start test %s (*)" % (i, name))
  183. if not running:
  184. break
  185. if updated:
  186. scr.move(num_servers + 1, 10)
  187. scr.clrtoeol()
  188. scr.addstr("{} %".format(int(100.0 * (total_passed + total_failed + total_skipped) / total_tests)))
  189. scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED={} PASS={} FAIL={} SKIP={}".format(total_tests, total_started, total_passed, total_failed, total_skipped))
  190. failed = get_failed(vm)
  191. if len(failed) > 0:
  192. scr.move(num_servers + 2, 0)
  193. scr.clrtoeol()
  194. scr.addstr("Failed test cases: ")
  195. for f in failed:
  196. scr.addstr(f)
  197. scr.addstr(' ')
  198. scr.move(0, 35)
  199. scr.clrtoeol()
  200. if rerun_tests:
  201. scr.addstr("(RETRY FAILED %d)" % len(rerun_tests))
  202. elif first_run_failures:
  203. scr.addstr("(RETRY FAILED)")
  204. scr.refresh()
  205. time.sleep(0.25)
  206. scr.refresh()
  207. time.sleep(0.3)
  208. def main():
  209. global num_servers
  210. global vm
  211. global dir
  212. global timestamp
  213. global tests
  214. global first_run_failures
  215. global total_started, total_passed, total_failed, total_skipped
  216. total_started = 0
  217. total_passed = 0
  218. total_failed = 0
  219. total_skipped = 0
  220. debug_level = logging.INFO
  221. if len(sys.argv) < 2:
  222. sys.exit("Usage: %s <number of VMs> [--debug] [--codecov] [params..]" % sys.argv[0])
  223. num_servers = int(sys.argv[1])
  224. if num_servers < 1:
  225. sys.exit("Too small number of VMs")
  226. timestamp = int(time.time())
  227. idx = 2
  228. if len(sys.argv) > idx and sys.argv[idx] == "--debug":
  229. idx += 1
  230. debug_level = logging.DEBUG
  231. if len(sys.argv) > idx and sys.argv[idx] == "--codecov":
  232. idx += 1
  233. print "Code coverage - build separate binaries"
  234. logdir = "/tmp/hwsim-test-logs/" + str(timestamp)
  235. os.makedirs(logdir)
  236. subprocess.check_call(['./build-codecov.sh', logdir])
  237. codecov_args = ['--codecov_dir', logdir]
  238. codecov = True
  239. else:
  240. codecov_args = []
  241. codecov = False
  242. first_run_failures = []
  243. tests = []
  244. cmd = [ '../run-tests.py', '-L' ] + sys.argv[idx:]
  245. lst = subprocess.Popen(cmd, stdout=subprocess.PIPE)
  246. for l in lst.stdout.readlines():
  247. name = l.split(' ')[0]
  248. tests.append(name)
  249. if len(tests) == 0:
  250. sys.exit("No test cases selected")
  251. if '-f' in sys.argv[idx:]:
  252. extra_args = sys.argv[idx:]
  253. else:
  254. extra_args = [x for x in sys.argv[idx:] if x not in tests]
  255. dir = '/tmp/hwsim-test-logs'
  256. try:
  257. os.mkdir(dir)
  258. except:
  259. pass
  260. if num_servers > 2 and len(tests) > 100:
  261. # Move test cases with long duration to the beginning as an
  262. # optimization to avoid last part of the test execution running a long
  263. # duration test case on a single VM while all other VMs have already
  264. # completed their work.
  265. long = [ "ap_roam_open",
  266. "ap_hs20_fetch_osu_stop",
  267. "ap_roam_wpa2_psk",
  268. "ibss_wpa_none_ccmp",
  269. "nfc_wps_er_handover_pk_hash_mismatch_sta",
  270. "go_neg_peers_force_diff_freq",
  271. "p2p_cli_invite",
  272. "sta_ap_scan_2b",
  273. "ap_pmf_sta_unprot_deauth_burst",
  274. "ap_bss_add_remove_during_ht_scan",
  275. "wext_scan_hidden",
  276. "autoscan_exponential",
  277. "nfc_p2p_client",
  278. "wnm_bss_keep_alive",
  279. "ap_inactivity_disconnect",
  280. "scan_bss_expiration_age",
  281. "autoscan_periodic",
  282. "discovery_group_client",
  283. "concurrent_p2pcli",
  284. "ap_bss_add_remove",
  285. "wpas_ap_wps",
  286. "wext_pmksa_cache",
  287. "ibss_wpa_none",
  288. "ap_ht_40mhz_intolerant_ap",
  289. "ibss_rsn",
  290. "discovery_pd_retries",
  291. "ap_wps_setup_locked_timeout",
  292. "ap_vht160",
  293. "dfs_radar",
  294. "dfs",
  295. "grpform_cred_ready_timeout",
  296. "ap_wps_pbc_timeout" ]
  297. for l in long:
  298. if l in tests:
  299. tests.remove(l)
  300. tests.insert(0, l)
  301. logger.setLevel(debug_level)
  302. log_handler = logging.FileHandler('parallel-vm.log')
  303. log_handler.setLevel(debug_level)
  304. fmt = "%(asctime)s %(levelname)s %(message)s"
  305. log_formatter = logging.Formatter(fmt)
  306. log_handler.setFormatter(log_formatter)
  307. logger.addHandler(log_handler)
  308. vm = {}
  309. for i in range(0, num_servers):
  310. print("\rStarting virtual machine {}/{}".format(i + 1, num_servers)),
  311. logger.info("Starting virtual machine {}/{}".format(i + 1, num_servers))
  312. cmd = ['./vm-run.sh', '--delay', str(i), '--timestamp', str(timestamp),
  313. '--ext', 'srv.%d' % (i + 1),
  314. '-i'] + codecov_args + extra_args
  315. vm[i] = {}
  316. vm[i]['first_run_done'] = False
  317. vm[i]['proc'] = subprocess.Popen(cmd,
  318. stdin=subprocess.PIPE,
  319. stdout=subprocess.PIPE,
  320. stderr=subprocess.PIPE)
  321. vm[i]['out'] = ""
  322. vm[i]['pending'] = ""
  323. vm[i]['err'] = ""
  324. vm[i]['failed'] = []
  325. for stream in [ vm[i]['proc'].stdout, vm[i]['proc'].stderr ]:
  326. fd = stream.fileno()
  327. fl = fcntl.fcntl(fd, fcntl.F_GETFL)
  328. fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
  329. print
  330. curses.wrapper(show_progress)
  331. with open('{}/{}-parallel.log'.format(dir, timestamp), 'w') as f:
  332. for i in range(0, num_servers):
  333. f.write('VM {}\n{}\n{}\n'.format(i, vm[i]['out'], vm[i]['err']))
  334. failed = get_failed(vm)
  335. if first_run_failures:
  336. print "Failed test cases:"
  337. for f in first_run_failures:
  338. print f,
  339. logger.info("Failed: " + f)
  340. print
  341. double_failed = []
  342. for name in failed:
  343. double_failed.append(name)
  344. for test in first_run_failures:
  345. double_failed.remove(test)
  346. if failed and not double_failed:
  347. print "All failed cases passed on retry"
  348. logger.info("All failed cases passed on retry")
  349. elif double_failed:
  350. print "Failed even on retry:"
  351. for f in double_failed:
  352. print f,
  353. logger.info("Failed on retry: " + f)
  354. print
  355. res = "TOTAL={} PASS={} FAIL={} SKIP={}".format(total_started,
  356. total_passed,
  357. total_failed,
  358. total_skipped)
  359. print(res)
  360. logger.info(res)
  361. print "Logs: " + dir + '/' + str(timestamp)
  362. logger.info("Logs: " + dir + '/' + str(timestamp))
  363. for i in range(0, num_servers):
  364. if len(vm[i]['pending']) > 0:
  365. logger.info("Unprocessed stdout from VM[%d]: '%s'" %
  366. (i, vm[i]['pending']))
  367. log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
  368. with open(log, 'r') as f:
  369. if "Kernel panic" in f.read():
  370. print "Kernel panic in " + log
  371. logger.info("Kernel panic in " + log)
  372. if codecov:
  373. print "Code coverage - preparing report"
  374. for i in range(num_servers):
  375. subprocess.check_call(['./process-codecov.sh',
  376. logdir + ".srv.%d" % (i + 1),
  377. str(i)])
  378. subprocess.check_call(['./combine-codecov.sh', logdir])
  379. print "file://%s/index.html" % logdir
  380. logger.info("Code coverage report: file://%s/index.html" % logdir)
  381. if double_failed:
  382. logger.info("Test run complete - failures found")
  383. sys.exit(2)
  384. if failed:
  385. logger.info("Test run complete - failures found on first run; passed on retry")
  386. sys.exit(1)
  387. logger.info("Test run complete - no failures")
  388. sys.exit(0)
  389. if __name__ == "__main__":
  390. main()