segment_mapper.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. # -*- coding: utf-8 -*-
  2. # Copyright (c) 2017-18 Richard Hull and contributors
  3. # See LICENSE.rst for details.
  4. _DIGITS = {
  5. ' ': 0x00,
  6. '!': 0xa0,
  7. '"': 0x22,
  8. '#': 0x3f,
  9. '$': 0x5b,
  10. '%': 0xa5,
  11. "'": 0x02,
  12. '(': 0x4e,
  13. ')': 0x78,
  14. '*': 0x49,
  15. '+': 0x07,
  16. ',': 0x80,
  17. '-': 0x01,
  18. '.': 0x80,
  19. '/': 0x25,
  20. '0': 0x7e,
  21. '1': 0x30,
  22. '2': 0x6d,
  23. '3': 0x79,
  24. '4': 0x33,
  25. '5': 0x5b,
  26. '6': 0x5f,
  27. '7': 0x70,
  28. '8': 0x7f,
  29. '9': 0x7b,
  30. ':': 0x48,
  31. ';': 0x58,
  32. '<': 0x0d,
  33. '=': 0x09,
  34. '>': 0x19,
  35. '?': 0xe5,
  36. '@': 0x6f,
  37. 'A': 0x77,
  38. 'B': 0x7f,
  39. 'C': 0x4e,
  40. 'D': 0x7e,
  41. 'E': 0x4f,
  42. 'F': 0x47,
  43. 'G': 0x5e,
  44. 'H': 0x37,
  45. 'I': 0x30,
  46. 'J': 0x38,
  47. 'K': 0x57,
  48. 'L': 0x0e,
  49. 'M': 0x54,
  50. 'N': 0x76,
  51. 'O': 0x7e,
  52. 'P': 0x67,
  53. 'Q': 0x73,
  54. 'R': 0x46,
  55. 'S': 0x5b,
  56. 'T': 0x0f,
  57. 'U': 0x3e,
  58. 'V': 0x3e,
  59. 'W': 0x2a,
  60. 'X': 0x37,
  61. 'Y': 0x3b,
  62. 'Z': 0x6d,
  63. '[': 0x43,
  64. '\\': 0x13,
  65. ']': 0x61,
  66. '^': 0x62,
  67. '_': 0x08,
  68. '`': 0x20,
  69. 'a': 0x7d,
  70. 'b': 0x1f,
  71. 'c': 0x0d,
  72. 'd': 0x3d,
  73. 'e': 0x6f,
  74. 'f': 0x47,
  75. 'g': 0x7b,
  76. 'h': 0x17,
  77. 'i': 0x10,
  78. 'j': 0x18,
  79. 'k': 0x57,
  80. 'l': 0x06,
  81. 'm': 0x14,
  82. 'n': 0x15,
  83. 'o': 0x1d,
  84. 'p': 0x67,
  85. 'q': 0x73,
  86. 'r': 0x05,
  87. 's': 0x5b,
  88. 't': 0x0f,
  89. 'u': 0x1c,
  90. 'v': 0x1c,
  91. 'w': 0x14,
  92. 'x': 0x37,
  93. 'y': 0x3b,
  94. 'z': 0x6d,
  95. '{': 0x31,
  96. '|': 0x06,
  97. '}': 0x07,
  98. '~': 0x40,
  99. u'°': 0x63,
  100. u'\xb0': 0x63,
  101. }
  102. def regular(text, notfound="_"):
  103. undefined = _DIGITS[notfound] if notfound is not None else None
  104. for char in iter(text):
  105. digit = _DIGITS.get(char, undefined)
  106. if digit is not None:
  107. yield digit
  108. def dot_muncher(text, notfound="_"):
  109. if not text:
  110. return
  111. undefined = _DIGITS[notfound] if notfound is not None else None
  112. last = None
  113. for char in iter(text):
  114. curr = _DIGITS.get(char, undefined)
  115. if curr == 0x80:
  116. yield curr | (last or 0)
  117. elif last != 0x80 and last is not None:
  118. yield last
  119. last = curr
  120. if curr != 0x80 and curr is not None:
  121. yield curr