From 21a6025481a3cda143c922a672847bf0afb42d54 Mon Sep 17 00:00:00 2001 From: pyoor Date: Thu, 2 Aug 2018 11:26:21 -0400 Subject: [PATCH] Refactor text generator --- lib/make/text.js | 389 +++++++++++++++++++++++++++++------------------ 1 file changed, 240 insertions(+), 149 deletions(-) diff --git a/lib/make/text.js b/lib/make/text.js index 2d4bbcc..bcdaf25 100644 --- a/lib/make/text.js +++ b/lib/make/text.js @@ -7,18 +7,66 @@ const make = require('../make') const utils = require('../utils') class text extends make { - static lineEnd () { + /** + * Generate a random alphabetic character + */ + static alpha () { + return String.fromCharCode(random.range('A'.charCodeAt(0), 'z'.charCodeAt(0))) + } + + /** + * Generate a random alphanumeric character + */ + static alphanum () { + return String.fromCharCode(random.range('0'.charCodeAt(0), 'z'.charCodeAt(0))) + } + + /** + * Generate a random assignment operator + */ + static assignmentOperator () { return random.pick([ - '\n', '\r', '\r\n', '\n\r' + '=', '+=', '-=', '*=', '/=', '%=', '**=', '<<=', '>>=', '>>>=', '&=', '^=', '|=' ]) } + /** + * Generate a random arithmetic operator + */ + static arithmeticOperator () { + return random.pick([ + '%', '-', '+', '*', '/' + ]) + } + + /** + * Generate a random control character + */ static controlChar () { return random.pick([ '\b', '\t', '\n', '\v', '\f', '\r', '\0', '\c', '\a', '\e' // eslint-disable-line no-useless-escape ]) } + /** + * Generate a random digit + */ + static digit () { + return String.fromCharCode(random.range('0'.charCodeAt(0), '9'.charCodeAt(0))) + } + + /** + * Generate a random line ending + */ + static lineEnd () { + return random.pick([ + '\n', '\r', '\r\n', '\n\r' + ]) + } + + /** + * Generate a random token + */ static token () { return random.pick([ '*', '+', '%', '-', '!', '^', ':', '|', '&', '<', '>', '.', '"', @@ -35,9 +83,9 @@ class text extends make { static language () { return random.pick([ // special casing for i, I, dotted/dotless variants - ['tr', 'az'], + ['tr', 'az', 'crh', 'tt', 'ba'], // special casing rules: https://developer.mozilla.org/en/CSS/text-transform - ['nl', 'gr'], + ['nl', 'el', 'ga'], // special justification rules ['ja', 'zh'], // tend to be RTL @@ -49,155 +97,176 @@ class text extends make { ]) } + /** + * Generate a random character that may affect layout + */ static layoutCharCodes () { - return random.pick([ - 0, // null - 160, // non-breaking space - 0x005C, // backslash, but in some countries, represents local currency symbol (e.g. yen) - 0x00AD, // soft hyphen - 0x0BCC, // a Tamil character that is displayed as three glyphs - // http://unicode.org/charts/PDF/U2000.pdf - 0x200B, // zero-width space - 0x200C, // zero-width non-joiner - 0x200D, // zero-width joiner - 0x200E, // left-to-right mark - 0x200F, // right-to-left mark - 0x2011, // non-breaking hyphen - 0x2027, // hyphenation point - 0x2028, // line separator - 0x2029, // paragraph separator - 0x202A, // left-to-right embedding - 0x202B, // right-to-left embedding - 0x202C, // pop directional formatting - 0x202D, // left-to-right override - 0x202E, // right-to-left override - 0x202F, // narrow no-break space - 0x2060, // word joiner - 0x2061, // function application (one of several invisible mathematical operators) - // http://unicode.org/charts/PDF/U3000.pdf - 0x3000, // ideographic space (CJK) - // http://unicode.org/charts/PDF/U0300.pdf - 0x0301, // combining acute accent (if it appears after "a", it turns into "a" with an accent) - // Arabic has the interesting property that most letters connect to the next letter. - // Some code calls this "shaping". - 0x0643, // arabic letter kaf - 0x0645, // arabic letter meem - 0x06CD, // arabic letter yeh with tail - 0xFDDE, // invalid unicode? but somehow associated with arabic. - // http://unicode.org/reports/tr36/tr36-7.html#Buffer_Overflows - // Characters with especially high expansion factors when they go through various unicode "normalizations" - 0x1F82, - 0xFDFA, - 0xFB2C, - 0x0390, - // 0x1D160, // hmm, need surrogates - // Characters with especially high expansion factors when lowercased or uppercased - 0x023A, - 0x0041, - 0xDC1D, // a low surrogate - 0xDB00, // a high surrogate - // UFFF0.pdf - 0xFFF9, // interlinear annotation anchor - 0xFFFA, // interlinear annotation seperator - 0xFFFB, // interlinear annotation terminator - 0xFFFC, // object replacement character - 0xFFFD, // replacement character - 0xFEFF, // zero width no-break space - 0xFFFF, // not a character - 0x00A0, // no-break space - 0x2426, - 0x003F, - 0x00BF, - 0xDC80, - 0xDCFF, - // http://en.wikipedia.org/wiki/Mapping_of_Unicode_characters - 0x205F, // mathematical space - 0x2061, // mathematical function application - 0x2064, // mathematical invisible separator - 0x2044 // fraction slash character - ]) + return String.fromCodePoint( + random.pick([ + 0, // null + 160, // non-breaking space + 0x005C, // backslash, but in some countries, represents local currency symbol (e.g. yen) + 0x00AD, // soft hyphen + 0x0BCC, // a Tamil character that is displayed as three glyphs + // http://unicode.org/charts/PDF/U2000.pdf + 0x200B, // zero-width space + 0x200C, // zero-width non-joiner + 0x200D, // zero-width joiner + 0x200E, // left-to-right mark + 0x200F, // right-to-left mark + 0x2011, // non-breaking hyphen + 0x2027, // hyphenation point + 0x2028, // line separator + 0x2029, // paragraph separator + 0x202A, // left-to-right embedding + 0x202B, // right-to-left embedding + 0x202C, // pop directional formatting + 0x202D, // left-to-right override + 0x202E, // right-to-left override + 0x202F, // narrow no-break space + 0x2060, // word joiner + 0x2061, // function application (one of several invisible mathematical operators) + // http://unicode.org/charts/PDF/U3000.pdf + 0x3000, // ideographic space (CJK) + // http://unicode.org/charts/PDF/U0300.pdf + 0x0301, // combining acute accent (if it appears after "a", it turns into "a" with an accent) + // Arabic has the interesting property that most letters connect to the next letter. + // Some code calls this "shaping". + 0x0643, // arabic letter kaf + 0x0645, // arabic letter meem + 0x06CD, // arabic letter yeh with tail + 0xFDDE, // invalid unicode? but somehow associated with arabic. + // http://unicode.org/reports/tr36/tr36-7.html#Buffer_Overflows + // Characters with especially high expansion factors when they go through various unicode "normalizations" + 0x1F82, + 0xFDFA, + 0xFB2C, + 0x0390, + // 0x1D160, // hmm, need surrogates + // Characters with especially high expansion factors when lowercased or uppercased + 0x023A, + 0x0041, + 0xDC1D, // a low surrogate + 0xDB00, // a high surrogate + // UFFF0.pdf + 0xFFF9, // interlinear annotation anchor + 0xFFFA, // interlinear annotation seperator + 0xFFFB, // interlinear annotation terminator + 0xFFFC, // object replacement character + 0xFFFD, // replacement character + 0xFEFF, // zero width no-break space + 0xFFFF, // not a character + 0x00A0, // no-break space + 0x2426, + 0x003F, + 0x00BF, + 0xDC80, + 0xDCFF, + // http://en.wikipedia.org/wiki/Mapping_of_Unicode_characters + 0x205F, // mathematical space + 0x2061, // mathematical function application + 0x2064, // mathematical invisible separator + 0x2044 // fraction slash character + ]) + ) } + /** + * Generate a random character that affects bidi layout + */ static bidiCharCodes () { - return random.pick([ - 0x0660, // START_HINDI_DIGITS - 0x0669, // END_HINDI_DIGITS - 0x066A, // START_ARABIC_SEPARATOR - 0x066B, // END_ARABIC_SEPARATOR - 0x0030, // START_ARABIC_DIGITS - 0x0039, // END_ARABIC_DIGITS - 0x06f0, // START_FARSI_DIGITS - 0x06f9 // END_FARSI_DIGITS - ]) + return String.fromCodePoint( + random.pick([ + 0x0660, // START_HINDI_DIGITS + 0x0669, // END_HINDI_DIGITS + 0x066A, // START_ARABIC_SEPARATOR + 0x066B, // END_ARABIC_SEPARATOR + 0x0030, // START_ARABIC_DIGITS + 0x0039, // END_ARABIC_DIGITS + 0x06f0, // START_FARSI_DIGITS + 0x06f9 // END_FARSI_DIGITS + ]) + ) } - // http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt - static unicodeCombiningCharacters () { - return random.item([ - [0x0300, 0x036F], // Combining Diacritical Marks - [0x0483, 0x0489], - [0x07EB, 0x07F3], - [0x135D, 0x135F], - [0x1A7F, 0x1A7F], - [0x1B6B, 0x1B73], - [0x1DC0, 0x1DFF], // Combining Diacritical Marks Supplement - [0x20D0, 0x2DFF], - [0x3099, 0x309A], - [0xA66F, 0xA6F1], - [0xA8E0, 0xA8F1], - [0xFE20, 0xFE26], // Combining Half Marks - [0x101FD, 0x101FD], - [0x1D165, 0x1D169], - [0x1D16D, 0x1D172], - [0x1D17B, 0x1D18B], - [0x1D1AA, 0x1D1AD], - [0x1D242, 0x1D244] - ]) + /** + * Generate a random unicode combining character + * http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt + */ + static unicodeCombiningCharacter () { + return String.fromCodePoint( + random.range( + ...random.item([ + [0x0300, 0x036F], // Combining Diacritical Marks + [0x0483, 0x0489], + [0x07EB, 0x07F3], + [0x135D, 0x135F], + [0x1A7F, 0x1A7F], + [0x1B6B, 0x1B73], + [0x1DC0, 0x1DFF], // Combining Diacritical Marks Supplement + [0x20D0, 0x2DFF], + [0x3099, 0x309A], + [0xA66F, 0xA6F1], + [0xA8E0, 0xA8F1], + [0xFE20, 0xFE26], // Combining Half Marks + [0x101FD, 0x101FD], + [0x1D165, 0x1D169], + [0x1D16D, 0x1D172], + [0x1D17B, 0x1D18B], + [0x1D1AA, 0x1D1AD], + [0x1D242, 0x1D244] + ]) + ) + ) } + /** + * Generate a random basic multilingual plane character + */ static unicodeBMP () { - return random.item([ - // BMP = Basic Multilingual Plane - [0x0000, 0xFFFF] - ]) + return String.fromCodePoint( + random.range(0x0000, 0xFFFF) + ) } + /** + * Generate a random supplementary multilingual plane character + */ static unicodeSMP () { - return random.item([ - // SMP = Supplementary Multilingual Plane - [0x10000, 0x13FFF], - [0x16000, 0x16FFF], - [0x1B000, 0x1BFFF], - [0x1D000, 0x1DFFF], - [0x1F000, 0x1FFFF] - ]) + return String.fromCodePoint( + random.range( + ...random.item([ + [0x10000, 0x13FFF], + [0x16000, 0x16FFF], + [0x1B000, 0x1BFFF], + [0x1D000, 0x1DFFF], + [0x1F000, 0x1FFFF] + ]) + ) + ) } + /** + * Generate a random supplementary ideographic plane character + */ static unicodeSIP () { - return random.item([ - // SIP = Supplementary Ideographic Plane - [0x20000, 0x2BFFF], - [0x2F000, 0x2FFFF] - ]) + return String.fromCodePoint( + random.range( + ...random.item([ + [0x20000, 0x2BFFF], + [0x2F000, 0x2FFFF] + ]) + ) + ) } + /** + * Generate a random supplementary special-purpose plane character + */ static unicodeSSP () { - return random.item([ - // SSP = Supplementary Special-purpose Plane - [0xE0000, 0xE0FFF] - ]) - } - - static assignmentOperator () { - return random.pick([ - '=', '+=', '-=', '*=', '/=', '%=', '**=', '<<=', '>>=', '>>>=', '&=', '^=', '|=' - ]) - } - - static arithmeticOperator () { - return random.pick([ - '%', '-', '+', '*', '/' - ]) + return String.fromCodePoint( + random.range(0xE0000, 0xE0FFF) + ) } static currency () { @@ -222,26 +291,48 @@ class text extends make { return utils.common.quote(text.any()) } - static chars () { - return random.pick([ - text.controlChar, - text.token, - text.assignmentOperator, - text.arithmeticOperator, - String.fromCharCode(text.layoutCharCodes()), - String.fromCharCode(text.bidiCharCodes()) + /** + * Wrapper for all text generators + * @returns {string} + */ + static random () { + return random.choose([ + [1, text.alpha], + [1, text.alphanum], + [1, text.arithmeticOperator], + [1, text.assignmentOperator], + [1, text.controlChar], + [1, text.digit], + [1, text.lineEnd], + [1, text.token], + [3, text.layoutCharCodes], + [3, text.bidiCharCodes], + [3, text.unicodeCombiningCharacter], + [3, text.unicodeBMP], + [3, text.unicodeSMP], + [3, text.unicodeSIP], + [3, text.unicodeSSP] ]) } + /** + * Generate a single character + */ + static character () { + return text.random().charAt(0) + } + + /** + * Generate string comprised of random generators + */ static any () { - // Generate a string compromised of random individual characters - // This might be too slow to used for all 'texts' uses let s = '' - // TODO: Len calculation take from DOMFuzz - maybe we should revise this? - let len = random.pick([make.number.tiny, make.number.range]) - for (let i = 0; i < len; i++) { - s += make.text.chars() + let len = random.range(1, 126) + + while (len--) { + s += make.text.random() } + return s } }