Refactor text generator

2018-08-02 11:26:21 -04:00 · 2018-08-02 11:26:21 -04:00 · 21a6025481
commit 21a6025481
parent b2206ba0b2
1 changed files with 240 additions and 149 deletions
--- a/lib/make/text.js
+++ b/lib/make/text.js
@ -7,18 +7,66 @@ const make = require('../make')
 const utils = require('../utils')

 class text extends make {
-  static lineEnd () {
+  /**
+   * Generate a random alphabetic character
+   */
+  static alpha () {
+    return String.fromCharCode(random.range('A'.charCodeAt(0), 'z'.charCodeAt(0)))
+  }
+
+  /**
+   * Generate a random alphanumeric character
+   */
+  static alphanum () {
+    return String.fromCharCode(random.range('0'.charCodeAt(0), 'z'.charCodeAt(0)))
+  }
+
+  /**
+   * Generate a random assignment operator
+   */
+  static assignmentOperator () {
    return random.pick([
-      '\n', '\r', '\r\n', '\n\r'
+      '=', '+=', '-=', '*=', '/=', '%=', '**=', '<<=', '>>=', '>>>=', '&=', '^=', '|='
    ])
  }

+  /**
+   * Generate a random arithmetic operator
+   */
+  static arithmeticOperator () {
+    return random.pick([
+      '%', '-', '+', '*', '/'
+    ])
+  }
+
+  /**
+   * Generate a random control character
+   */
  static controlChar () {
    return random.pick([
      '\b', '\t', '\n', '\v', '\f', '\r', '\0', '\c', '\a', '\e' // eslint-disable-line no-useless-escape
    ])
  }

+  /**
+   * Generate a random digit
+   */
+  static digit () {
+    return String.fromCharCode(random.range('0'.charCodeAt(0), '9'.charCodeAt(0)))
+  }
+
+  /**
+   * Generate a random line ending
+   */
+  static lineEnd () {
+    return random.pick([
+      '\n', '\r', '\r\n', '\n\r'
+    ])
+  }
+
+  /**
+   * Generate a random token
+   */
  static token () {
    return random.pick([
      '*', '+', '%', '-', '!', '^', ':', '|', '&', '<', '>', '.', '"',
@ -35,9 +83,9 @@ class text extends make {
  static language () {
    return random.pick([
      // special casing for i, I, dotted/dotless variants
-      ['tr', 'az'],
+      ['tr', 'az', 'crh', 'tt', 'ba'],
      // special casing rules: https://developer.mozilla.org/en/CSS/text-transform
-      ['nl', 'gr'],
+      ['nl', 'el', 'ga'],
      // special justification rules
      ['ja', 'zh'],
      // tend to be RTL
@ -49,155 +97,176 @@ class text extends make {
    ])
  }

+  /**
+   * Generate a random character that may affect layout
+   */
  static layoutCharCodes () {
-    return random.pick([
-      0, // null
-      160, // non-breaking space
-      0x005C, // backslash, but in some countries, represents local currency symbol (e.g. yen)
-      0x00AD, // soft hyphen
-      0x0BCC, // a Tamil character that is displayed as three glyphs
-      // http://unicode.org/charts/PDF/U2000.pdf
-      0x200B, // zero-width space
-      0x200C, // zero-width non-joiner
-      0x200D, // zero-width joiner
-      0x200E, // left-to-right mark
-      0x200F, // right-to-left mark
-      0x2011, // non-breaking hyphen
-      0x2027, // hyphenation point
-      0x2028, // line separator
-      0x2029, // paragraph separator
-      0x202A, // left-to-right embedding
-      0x202B, // right-to-left embedding
-      0x202C, // pop directional formatting
-      0x202D, // left-to-right override
-      0x202E, // right-to-left override
-      0x202F, // narrow no-break space
-      0x2060, // word joiner
-      0x2061, // function application (one of several invisible mathematical operators)
-      // http://unicode.org/charts/PDF/U3000.pdf
-      0x3000, // ideographic space (CJK)
-      // http://unicode.org/charts/PDF/U0300.pdf
-      0x0301, // combining acute accent (if it appears after "a", it turns into "a" with an accent)
-      // Arabic has the interesting property that most letters connect to the next letter.
-      // Some code calls this "shaping".
-      0x0643, // arabic letter kaf
-      0x0645, // arabic letter meem
-      0x06CD, // arabic letter yeh with tail
-      0xFDDE, // invalid unicode? but somehow associated with arabic.
-      // http://unicode.org/reports/tr36/tr36-7.html#Buffer_Overflows
-      // Characters with especially high expansion factors when they go through various unicode "normalizations"
-      0x1F82,
-      0xFDFA,
-      0xFB2C,
-      0x0390,
-      // 0x1D160, // hmm, need surrogates
-      // Characters with especially high expansion factors when lowercased or uppercased
-      0x023A,
-      0x0041,
-      0xDC1D, // a low surrogate
-      0xDB00, // a high surrogate
-      // UFFF0.pdf
-      0xFFF9, // interlinear annotation anchor
-      0xFFFA, // interlinear annotation seperator
-      0xFFFB, // interlinear annotation terminator
-      0xFFFC, // object replacement character
-      0xFFFD, // replacement character
-      0xFEFF, // zero width no-break space
-      0xFFFF, // not a character
-      0x00A0, // no-break space
-      0x2426,
-      0x003F,
-      0x00BF,
-      0xDC80,
-      0xDCFF,
-      // http://en.wikipedia.org/wiki/Mapping_of_Unicode_characters
-      0x205F, // mathematical space
-      0x2061, // mathematical function application
-      0x2064, // mathematical invisible separator
-      0x2044 // fraction slash character
-    ])
+    return String.fromCodePoint(
+      random.pick([
+        0, // null
+        160, // non-breaking space
+        0x005C, // backslash, but in some countries, represents local currency symbol (e.g. yen)
+        0x00AD, // soft hyphen
+        0x0BCC, // a Tamil character that is displayed as three glyphs
+        // http://unicode.org/charts/PDF/U2000.pdf
+        0x200B, // zero-width space
+        0x200C, // zero-width non-joiner
+        0x200D, // zero-width joiner
+        0x200E, // left-to-right mark
+        0x200F, // right-to-left mark
+        0x2011, // non-breaking hyphen
+        0x2027, // hyphenation point
+        0x2028, // line separator
+        0x2029, // paragraph separator
+        0x202A, // left-to-right embedding
+        0x202B, // right-to-left embedding
+        0x202C, // pop directional formatting
+        0x202D, // left-to-right override
+        0x202E, // right-to-left override
+        0x202F, // narrow no-break space
+        0x2060, // word joiner
+        0x2061, // function application (one of several invisible mathematical operators)
+        // http://unicode.org/charts/PDF/U3000.pdf
+        0x3000, // ideographic space (CJK)
+        // http://unicode.org/charts/PDF/U0300.pdf
+        0x0301, // combining acute accent (if it appears after "a", it turns into "a" with an accent)
+        // Arabic has the interesting property that most letters connect to the next letter.
+        // Some code calls this "shaping".
+        0x0643, // arabic letter kaf
+        0x0645, // arabic letter meem
+        0x06CD, // arabic letter yeh with tail
+        0xFDDE, // invalid unicode? but somehow associated with arabic.
+        // http://unicode.org/reports/tr36/tr36-7.html#Buffer_Overflows
+        // Characters with especially high expansion factors when they go through various unicode "normalizations"
+        0x1F82,
+        0xFDFA,
+        0xFB2C,
+        0x0390,
+        // 0x1D160, // hmm, need surrogates
+        // Characters with especially high expansion factors when lowercased or uppercased
+        0x023A,
+        0x0041,
+        0xDC1D, // a low surrogate
+        0xDB00, // a high surrogate
+        // UFFF0.pdf
+        0xFFF9, // interlinear annotation anchor
+        0xFFFA, // interlinear annotation seperator
+        0xFFFB, // interlinear annotation terminator
+        0xFFFC, // object replacement character
+        0xFFFD, // replacement character
+        0xFEFF, // zero width no-break space
+        0xFFFF, // not a character
+        0x00A0, // no-break space
+        0x2426,
+        0x003F,
+        0x00BF,
+        0xDC80,
+        0xDCFF,
+        // http://en.wikipedia.org/wiki/Mapping_of_Unicode_characters
+        0x205F, // mathematical space
+        0x2061, // mathematical function application
+        0x2064, // mathematical invisible separator
+        0x2044 // fraction slash character
+      ])
+    )
  }

+  /**
+   * Generate a random character that affects bidi layout
+   */
  static bidiCharCodes () {
-    return random.pick([
-      0x0660, // START_HINDI_DIGITS
-      0x0669, // END_HINDI_DIGITS
-      0x066A, // START_ARABIC_SEPARATOR
-      0x066B, // END_ARABIC_SEPARATOR
-      0x0030, // START_ARABIC_DIGITS
-      0x0039, // END_ARABIC_DIGITS
-      0x06f0, // START_FARSI_DIGITS
-      0x06f9 // END_FARSI_DIGITS
-    ])
+    return String.fromCodePoint(
+      random.pick([
+        0x0660, // START_HINDI_DIGITS
+        0x0669, // END_HINDI_DIGITS
+        0x066A, // START_ARABIC_SEPARATOR
+        0x066B, // END_ARABIC_SEPARATOR
+        0x0030, // START_ARABIC_DIGITS
+        0x0039, // END_ARABIC_DIGITS
+        0x06f0, // START_FARSI_DIGITS
+        0x06f9 // END_FARSI_DIGITS
+      ])
+    )
  }

-  // http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt
-  static unicodeCombiningCharacters () {
-    return random.item([
-      [0x0300, 0x036F], // Combining Diacritical Marks
-      [0x0483, 0x0489],
-      [0x07EB, 0x07F3],
-      [0x135D, 0x135F],
-      [0x1A7F, 0x1A7F],
-      [0x1B6B, 0x1B73],
-      [0x1DC0, 0x1DFF], // Combining Diacritical Marks Supplement
-      [0x20D0, 0x2DFF],
-      [0x3099, 0x309A],
-      [0xA66F, 0xA6F1],
-      [0xA8E0, 0xA8F1],
-      [0xFE20, 0xFE26], // Combining Half Marks
-      [0x101FD, 0x101FD],
-      [0x1D165, 0x1D169],
-      [0x1D16D, 0x1D172],
-      [0x1D17B, 0x1D18B],
-      [0x1D1AA, 0x1D1AD],
-      [0x1D242, 0x1D244]
-    ])
+  /**
+   * Generate a random unicode combining character
+   * http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt
+   */
+  static unicodeCombiningCharacter () {
+    return String.fromCodePoint(
+      random.range(
+        ...random.item([
+          [0x0300, 0x036F], // Combining Diacritical Marks
+          [0x0483, 0x0489],
+          [0x07EB, 0x07F3],
+          [0x135D, 0x135F],
+          [0x1A7F, 0x1A7F],
+          [0x1B6B, 0x1B73],
+          [0x1DC0, 0x1DFF], // Combining Diacritical Marks Supplement
+          [0x20D0, 0x2DFF],
+          [0x3099, 0x309A],
+          [0xA66F, 0xA6F1],
+          [0xA8E0, 0xA8F1],
+          [0xFE20, 0xFE26], // Combining Half Marks
+          [0x101FD, 0x101FD],
+          [0x1D165, 0x1D169],
+          [0x1D16D, 0x1D172],
+          [0x1D17B, 0x1D18B],
+          [0x1D1AA, 0x1D1AD],
+          [0x1D242, 0x1D244]
+        ])
+      )
+    )
  }

+  /**
+   * Generate a random basic multilingual plane character
+   */
  static unicodeBMP () {
-    return random.item([
-      // BMP = Basic Multilingual Plane
-      [0x0000, 0xFFFF]
-    ])
+    return String.fromCodePoint(
+      random.range(0x0000, 0xFFFF)
+    )
  }

+  /**
+   * Generate a random supplementary multilingual plane character
+   */
  static unicodeSMP () {
-    return random.item([
-      // SMP = Supplementary Multilingual Plane
-      [0x10000, 0x13FFF],
-      [0x16000, 0x16FFF],
-      [0x1B000, 0x1BFFF],
-      [0x1D000, 0x1DFFF],
-      [0x1F000, 0x1FFFF]
-    ])
+    return String.fromCodePoint(
+      random.range(
+        ...random.item([
+          [0x10000, 0x13FFF],
+          [0x16000, 0x16FFF],
+          [0x1B000, 0x1BFFF],
+          [0x1D000, 0x1DFFF],
+          [0x1F000, 0x1FFFF]
+        ])
+      )
+    )
  }

+  /**
+   * Generate a random supplementary ideographic plane character
+   */
  static unicodeSIP () {
-    return random.item([
-      // SIP = Supplementary Ideographic Plane
-      [0x20000, 0x2BFFF],
-      [0x2F000, 0x2FFFF]
-    ])
+    return String.fromCodePoint(
+      random.range(
+        ...random.item([
+          [0x20000, 0x2BFFF],
+          [0x2F000, 0x2FFFF]
+        ])
+      )
+    )
  }

+  /**
+   * Generate a random supplementary special-purpose plane character
+   */
  static unicodeSSP () {
-    return random.item([
-      // SSP = Supplementary Special-purpose Plane
-      [0xE0000, 0xE0FFF]
-    ])
-  }
-
-  static assignmentOperator () {
-    return random.pick([
-      '=', '+=', '-=', '*=', '/=', '%=', '**=', '<<=', '>>=', '>>>=', '&=', '^=', '|='
-    ])
-  }
-
-  static arithmeticOperator () {
-    return random.pick([
-      '%', '-', '+', '*', '/'
-    ])
+    return String.fromCodePoint(
+      random.range(0xE0000, 0xE0FFF)
+    )
  }

  static currency () {
@ -222,26 +291,48 @@ class text extends make {
    return utils.common.quote(text.any())
  }

-  static chars () {
-    return random.pick([
-      text.controlChar,
-      text.token,
-      text.assignmentOperator,
-      text.arithmeticOperator,
-      String.fromCharCode(text.layoutCharCodes()),
-      String.fromCharCode(text.bidiCharCodes())
+  /**
+   * Wrapper for all text generators
+   * @returns {string}
+   */
+  static random () {
+    return random.choose([
+      [1, text.alpha],
+      [1, text.alphanum],
+      [1, text.arithmeticOperator],
+      [1, text.assignmentOperator],
+      [1, text.controlChar],
+      [1, text.digit],
+      [1, text.lineEnd],
+      [1, text.token],
+      [3, text.layoutCharCodes],
+      [3, text.bidiCharCodes],
+      [3, text.unicodeCombiningCharacter],
+      [3, text.unicodeBMP],
+      [3, text.unicodeSMP],
+      [3, text.unicodeSIP],
+      [3, text.unicodeSSP]
    ])
  }

+  /**
+   * Generate a single character
+   */
+  static character () {
+    return text.random().charAt(0)
+  }
+
+  /**
+   * Generate string comprised of random generators
+   */
  static any () {
-    // Generate a string compromised of random individual characters
-    // This might be too slow to used for all 'texts' uses
    let s = ''
-    // TODO: Len calculation take from DOMFuzz - maybe we should revise this?
-    let len = random.pick([make.number.tiny, make.number.range])
-    for (let i = 0; i < len; i++) {
-      s += make.text.chars()
+    let len = random.range(1, 126)
+
+    while (len--) {
+      s += make.text.random()
    }
+
    return s
  }
 }