parallel-vm.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. #!/usr/bin/env python2
  2. #
  3. # Parallel VM test case executor
  4. # Copyright (c) 2014, Jouni Malinen <j@w1.fi>
  5. #
  6. # This software may be distributed under the terms of the BSD license.
  7. # See README for more details.
  8. import curses
  9. import fcntl
  10. import logging
  11. import os
  12. import subprocess
  13. import sys
  14. import time
  15. logger = logging.getLogger()
  16. def get_results():
  17. global vm
  18. started = []
  19. passed = []
  20. failed = []
  21. skipped = []
  22. for i in range(0, num_servers):
  23. lines = vm[i]['out'].splitlines()
  24. started += [ l for l in lines if l.startswith('START ') ]
  25. passed += [ l for l in lines if l.startswith('PASS ') ]
  26. failed += [ l for l in lines if l.startswith('FAIL ') ]
  27. skipped += [ l for l in lines if l.startswith('SKIP ') ]
  28. return (started, passed, failed, skipped)
  29. def vm_ready_str(s):
  30. return s.startswith("READY") or s.startswith("PASS") or \
  31. s.startswith("FAIL") or s.startswith("SKIP")
  32. def vm_read_stdout(vm, i):
  33. ready = False
  34. try:
  35. out = vm['proc'].stdout.read()
  36. except:
  37. return False
  38. logger.debug("VM[%d] stdout.read[%s]" % (i, out))
  39. pending = vm['pending'] + out
  40. lines = []
  41. while True:
  42. pos = pending.find('\n')
  43. if pos < 0:
  44. break
  45. line = pending[0:pos].rstrip()
  46. pending = pending[(pos + 1):]
  47. logger.debug("VM[%d] stdout full line[%s]" % (i, line))
  48. if vm_ready_str(line):
  49. ready = True
  50. vm['out'] += line + '\n'
  51. lines.append(line)
  52. vm['pending'] = pending
  53. return ready
  54. def show_progress(scr):
  55. global num_servers
  56. global vm
  57. global dir
  58. global timestamp
  59. global tests
  60. global first_run_failures
  61. total_tests = len(tests)
  62. logger.info("Total tests: %d" % total_tests)
  63. scr.leaveok(1)
  64. scr.addstr(0, 0, "Parallel test execution status", curses.A_BOLD)
  65. for i in range(0, num_servers):
  66. scr.addstr(i + 1, 0, "VM %d:" % (i + 1), curses.A_BOLD)
  67. scr.addstr(i + 1, 10, "starting VM")
  68. scr.addstr(num_servers + 1, 0, "Total:", curses.A_BOLD)
  69. scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests))
  70. scr.refresh()
  71. completed_first_pass = False
  72. rerun_tests = []
  73. while True:
  74. running = False
  75. first_running = False
  76. updated = False
  77. for i in range(0, num_servers):
  78. if completed_first_pass:
  79. continue
  80. if vm[i]['first_run_done']:
  81. continue
  82. if not vm[i]['proc']:
  83. continue
  84. if vm[i]['proc'].poll() is not None:
  85. vm[i]['proc'] = None
  86. scr.move(i + 1, 10)
  87. scr.clrtoeol()
  88. log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
  89. with open(log, 'r') as f:
  90. if "Kernel panic" in f.read():
  91. scr.addstr("kernel panic")
  92. logger.info("VM[%d] kernel panic" % i)
  93. else:
  94. scr.addstr("unexpected exit")
  95. logger.info("VM[%d] unexpected exit" % i)
  96. updated = True
  97. continue
  98. running = True
  99. first_running = True
  100. try:
  101. err = vm[i]['proc'].stderr.read()
  102. vm[i]['err'] += err
  103. logger.debug("VM[%d] stderr.read[%s]" % (i, err))
  104. except:
  105. pass
  106. if vm_read_stdout(vm[i], i):
  107. scr.move(i + 1, 10)
  108. scr.clrtoeol()
  109. updated = True
  110. if not tests:
  111. vm[i]['first_run_done'] = True
  112. scr.addstr("completed first round")
  113. logger.info("VM[%d] completed first round" % i)
  114. continue
  115. else:
  116. name = tests.pop(0)
  117. vm[i]['proc'].stdin.write(name + '\n')
  118. scr.addstr(name)
  119. logger.debug("VM[%d] start test %s" % (i, name))
  120. if not first_running and not completed_first_pass:
  121. logger.info("First round of testing completed")
  122. if tests:
  123. logger.info("Unexpected test cases remaining from first round: " + str(tests))
  124. raise Exception("Unexpected test cases remaining from first round")
  125. completed_first_pass = True
  126. (started, passed, failed, skipped) = get_results()
  127. for f in failed:
  128. name = f.split(' ')[1]
  129. rerun_tests.append(name)
  130. first_run_failures.append(name)
  131. for i in range(num_servers):
  132. if not completed_first_pass:
  133. continue
  134. if not vm[i]['proc']:
  135. continue
  136. if vm[i]['proc'].poll() is not None:
  137. vm[i]['proc'] = None
  138. scr.move(i + 1, 10)
  139. scr.clrtoeol()
  140. log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
  141. with open(log, 'r') as f:
  142. if "Kernel panic" in f.read():
  143. scr.addstr("kernel panic")
  144. logger.info("VM[%d] kernel panic" % i)
  145. else:
  146. scr.addstr("completed run")
  147. logger.info("VM[%d] completed run" % i)
  148. updated = True
  149. continue
  150. running = True
  151. try:
  152. err = vm[i]['proc'].stderr.read()
  153. vm[i]['err'] += err
  154. logger.debug("VM[%d] stderr.read[%s]" % (i, err))
  155. except:
  156. pass
  157. ready = False
  158. if vm[i]['first_run_done']:
  159. vm[i]['first_run_done'] = False
  160. ready = True
  161. else:
  162. ready = vm_read_stdout(vm[i], i)
  163. if ready:
  164. scr.move(i + 1, 10)
  165. scr.clrtoeol()
  166. updated = True
  167. if not rerun_tests:
  168. vm[i]['proc'].stdin.write('\n')
  169. scr.addstr("shutting down")
  170. logger.info("VM[%d] shutting down" % i)
  171. else:
  172. name = rerun_tests.pop(0)
  173. vm[i]['proc'].stdin.write(name + '\n')
  174. scr.addstr(name + "(*)")
  175. logger.debug("VM[%d] start test %s (*)" % (i, name))
  176. if not running:
  177. break
  178. if updated:
  179. (started, passed, failed, skipped) = get_results()
  180. scr.move(num_servers + 1, 10)
  181. scr.clrtoeol()
  182. scr.addstr("{} %".format(int(100.0 * (len(passed) + len(failed) + len(skipped)) / total_tests)))
  183. scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED={} PASS={} FAIL={} SKIP={}".format(total_tests, len(started), len(passed), len(failed), len(skipped)))
  184. if len(failed) > 0:
  185. scr.move(num_servers + 2, 0)
  186. scr.clrtoeol()
  187. scr.addstr("Failed test cases: ")
  188. for f in failed:
  189. scr.addstr(f.split(' ')[1])
  190. scr.addstr(' ')
  191. scr.move(0, 35)
  192. scr.clrtoeol()
  193. if rerun_tests:
  194. scr.addstr("(RETRY FAILED %d)" % len(rerun_tests))
  195. elif first_run_failures:
  196. scr.addstr("(RETRY FAILED)")
  197. scr.refresh()
  198. time.sleep(0.25)
  199. scr.refresh()
  200. time.sleep(0.3)
  201. def main():
  202. global num_servers
  203. global vm
  204. global dir
  205. global timestamp
  206. global tests
  207. global first_run_failures
  208. debug_level = logging.INFO
  209. if len(sys.argv) < 2:
  210. sys.exit("Usage: %s <number of VMs> [--debug] [--codecov] [params..]" % sys.argv[0])
  211. num_servers = int(sys.argv[1])
  212. if num_servers < 1:
  213. sys.exit("Too small number of VMs")
  214. timestamp = int(time.time())
  215. idx = 2
  216. if len(sys.argv) > idx and sys.argv[idx] == "--debug":
  217. idx += 1
  218. debug_level = logging.DEBUG
  219. if len(sys.argv) > idx and sys.argv[idx] == "--codecov":
  220. idx += 1
  221. print "Code coverage - build separate binaries"
  222. logdir = "/tmp/hwsim-test-logs/" + str(timestamp)
  223. os.makedirs(logdir)
  224. subprocess.check_call(['./build-codecov.sh', logdir])
  225. codecov_args = ['--codecov_dir', logdir]
  226. codecov = True
  227. else:
  228. codecov_args = []
  229. codecov = False
  230. first_run_failures = []
  231. tests = []
  232. cmd = [ '../run-tests.py', '-L' ] + sys.argv[idx:]
  233. lst = subprocess.Popen(cmd, stdout=subprocess.PIPE)
  234. for l in lst.stdout.readlines():
  235. name = l.split(' ')[0]
  236. tests.append(name)
  237. if len(tests) == 0:
  238. sys.exit("No test cases selected")
  239. if '-f' in sys.argv[idx:]:
  240. extra_args = sys.argv[idx:]
  241. else:
  242. extra_args = [x for x in sys.argv[idx:] if x not in tests]
  243. dir = '/tmp/hwsim-test-logs'
  244. try:
  245. os.mkdir(dir)
  246. except:
  247. pass
  248. if num_servers > 2 and len(tests) > 100:
  249. # Move test cases with long duration to the beginning as an
  250. # optimization to avoid last part of the test execution running a long
  251. # duration test case on a single VM while all other VMs have already
  252. # completed their work.
  253. long = [ "ap_roam_open",
  254. "ap_hs20_fetch_osu_stop",
  255. "ap_roam_wpa2_psk",
  256. "ibss_wpa_none_ccmp",
  257. "nfc_wps_er_handover_pk_hash_mismatch_sta",
  258. "go_neg_peers_force_diff_freq",
  259. "p2p_cli_invite",
  260. "sta_ap_scan_2b",
  261. "ap_pmf_sta_unprot_deauth_burst",
  262. "ap_bss_add_remove_during_ht_scan",
  263. "wext_scan_hidden",
  264. "autoscan_exponential",
  265. "nfc_p2p_client",
  266. "wnm_bss_keep_alive",
  267. "ap_inactivity_disconnect",
  268. "scan_bss_expiration_age",
  269. "autoscan_periodic",
  270. "discovery_group_client",
  271. "concurrent_p2pcli",
  272. "ap_bss_add_remove",
  273. "wpas_ap_wps",
  274. "wext_pmksa_cache",
  275. "ibss_wpa_none",
  276. "ap_ht_40mhz_intolerant_ap",
  277. "ibss_rsn",
  278. "discovery_pd_retries",
  279. "ap_wps_setup_locked_timeout",
  280. "ap_vht160",
  281. "dfs_radar",
  282. "dfs",
  283. "grpform_cred_ready_timeout",
  284. "ap_wps_pbc_timeout" ]
  285. for l in long:
  286. if l in tests:
  287. tests.remove(l)
  288. tests.insert(0, l)
  289. logger.setLevel(debug_level)
  290. log_handler = logging.FileHandler('parallel-vm.log')
  291. log_handler.setLevel(debug_level)
  292. fmt = "%(asctime)s %(levelname)s %(message)s"
  293. log_formatter = logging.Formatter(fmt)
  294. log_handler.setFormatter(log_formatter)
  295. logger.addHandler(log_handler)
  296. vm = {}
  297. for i in range(0, num_servers):
  298. print("\rStarting virtual machine {}/{}".format(i + 1, num_servers)),
  299. logger.info("Starting virtual machine {}/{}".format(i + 1, num_servers))
  300. cmd = ['./vm-run.sh', '--delay', str(i), '--timestamp', str(timestamp),
  301. '--ext', 'srv.%d' % (i + 1),
  302. '-i'] + codecov_args + extra_args
  303. vm[i] = {}
  304. vm[i]['first_run_done'] = False
  305. vm[i]['proc'] = subprocess.Popen(cmd,
  306. stdin=subprocess.PIPE,
  307. stdout=subprocess.PIPE,
  308. stderr=subprocess.PIPE)
  309. vm[i]['out'] = ""
  310. vm[i]['pending'] = ""
  311. vm[i]['err'] = ""
  312. for stream in [ vm[i]['proc'].stdout, vm[i]['proc'].stderr ]:
  313. fd = stream.fileno()
  314. fl = fcntl.fcntl(fd, fcntl.F_GETFL)
  315. fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
  316. print
  317. curses.wrapper(show_progress)
  318. with open('{}/{}-parallel.log'.format(dir, timestamp), 'w') as f:
  319. for i in range(0, num_servers):
  320. f.write('VM {}\n{}\n{}\n'.format(i, vm[i]['out'], vm[i]['err']))
  321. (started, passed, failed, skipped) = get_results()
  322. if first_run_failures:
  323. print "Failed test cases:"
  324. for f in first_run_failures:
  325. print f,
  326. logger.info("Failed: " + f)
  327. print
  328. double_failed = []
  329. for f in failed:
  330. name = f.split(' ')[1]
  331. double_failed.append(name)
  332. for test in first_run_failures:
  333. double_failed.remove(test)
  334. if failed and not double_failed:
  335. print "All failed cases passed on retry"
  336. logger.info("All failed cases passed on retry")
  337. elif double_failed:
  338. print "Failed even on retry:"
  339. for f in double_failed:
  340. print f,
  341. logger.info("Failed on retry: " + f)
  342. print
  343. res = "TOTAL={} PASS={} FAIL={} SKIP={}".format(len(started), len(passed), len(failed), len(skipped))
  344. print(res)
  345. logger.info(res)
  346. print "Logs: " + dir + '/' + str(timestamp)
  347. logger.info("Logs: " + dir + '/' + str(timestamp))
  348. for i in range(0, num_servers):
  349. if len(vm[i]['pending']) > 0:
  350. logger.info("Unprocessed stdout from VM[%d]: '%s'" %
  351. (i, vm[i]['pending']))
  352. log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
  353. with open(log, 'r') as f:
  354. if "Kernel panic" in f.read():
  355. print "Kernel panic in " + log
  356. logger.info("Kernel panic in " + log)
  357. if codecov:
  358. print "Code coverage - preparing report"
  359. for i in range(num_servers):
  360. subprocess.check_call(['./process-codecov.sh',
  361. logdir + ".srv.%d" % (i + 1),
  362. str(i)])
  363. subprocess.check_call(['./combine-codecov.sh', logdir])
  364. print "file://%s/index.html" % logdir
  365. logger.info("Code coverage report: file://%s/index.html" % logdir)
  366. if double_failed:
  367. logger.info("Test run complete - failures found")
  368. sys.exit(2)
  369. if failed:
  370. logger.info("Test run complete - failures found on first run; passed on retry")
  371. sys.exit(1)
  372. logger.info("Test run complete - no failures")
  373. sys.exit(0)
  374. if __name__ == "__main__":
  375. main()