Refactor text generator

This commit is contained in:
pyoor 2018-08-02 11:26:21 -04:00
parent b2206ba0b2
commit 21a6025481
1 changed files with 240 additions and 149 deletions

View File

@ -7,18 +7,66 @@ const make = require('../make')
const utils = require('../utils') const utils = require('../utils')
class text extends make { class text extends make {
static lineEnd () { /**
* Generate a random alphabetic character
*/
static alpha () {
return String.fromCharCode(random.range('A'.charCodeAt(0), 'z'.charCodeAt(0)))
}
/**
* Generate a random alphanumeric character
*/
static alphanum () {
return String.fromCharCode(random.range('0'.charCodeAt(0), 'z'.charCodeAt(0)))
}
/**
* Generate a random assignment operator
*/
static assignmentOperator () {
return random.pick([ return random.pick([
'\n', '\r', '\r\n', '\n\r' '=', '+=', '-=', '*=', '/=', '%=', '**=', '<<=', '>>=', '>>>=', '&=', '^=', '|='
]) ])
} }
/**
* Generate a random arithmetic operator
*/
static arithmeticOperator () {
return random.pick([
'%', '-', '+', '*', '/'
])
}
/**
* Generate a random control character
*/
static controlChar () { static controlChar () {
return random.pick([ return random.pick([
'\b', '\t', '\n', '\v', '\f', '\r', '\0', '\c', '\a', '\e' // eslint-disable-line no-useless-escape '\b', '\t', '\n', '\v', '\f', '\r', '\0', '\c', '\a', '\e' // eslint-disable-line no-useless-escape
]) ])
} }
/**
* Generate a random digit
*/
static digit () {
return String.fromCharCode(random.range('0'.charCodeAt(0), '9'.charCodeAt(0)))
}
/**
* Generate a random line ending
*/
static lineEnd () {
return random.pick([
'\n', '\r', '\r\n', '\n\r'
])
}
/**
* Generate a random token
*/
static token () { static token () {
return random.pick([ return random.pick([
'*', '+', '%', '-', '!', '^', ':', '|', '&', '<', '>', '.', '"', '*', '+', '%', '-', '!', '^', ':', '|', '&', '<', '>', '.', '"',
@ -35,9 +83,9 @@ class text extends make {
static language () { static language () {
return random.pick([ return random.pick([
// special casing for i, I, dotted/dotless variants // special casing for i, I, dotted/dotless variants
['tr', 'az'], ['tr', 'az', 'crh', 'tt', 'ba'],
// special casing rules: https://developer.mozilla.org/en/CSS/text-transform // special casing rules: https://developer.mozilla.org/en/CSS/text-transform
['nl', 'gr'], ['nl', 'el', 'ga'],
// special justification rules // special justification rules
['ja', 'zh'], ['ja', 'zh'],
// tend to be RTL // tend to be RTL
@ -49,155 +97,176 @@ class text extends make {
]) ])
} }
/**
* Generate a random character that may affect layout
*/
static layoutCharCodes () { static layoutCharCodes () {
return random.pick([ return String.fromCodePoint(
0, // null random.pick([
160, // non-breaking space 0, // null
0x005C, // backslash, but in some countries, represents local currency symbol (e.g. yen) 160, // non-breaking space
0x00AD, // soft hyphen 0x005C, // backslash, but in some countries, represents local currency symbol (e.g. yen)
0x0BCC, // a Tamil character that is displayed as three glyphs 0x00AD, // soft hyphen
// http://unicode.org/charts/PDF/U2000.pdf 0x0BCC, // a Tamil character that is displayed as three glyphs
0x200B, // zero-width space // http://unicode.org/charts/PDF/U2000.pdf
0x200C, // zero-width non-joiner 0x200B, // zero-width space
0x200D, // zero-width joiner 0x200C, // zero-width non-joiner
0x200E, // left-to-right mark 0x200D, // zero-width joiner
0x200F, // right-to-left mark 0x200E, // left-to-right mark
0x2011, // non-breaking hyphen 0x200F, // right-to-left mark
0x2027, // hyphenation point 0x2011, // non-breaking hyphen
0x2028, // line separator 0x2027, // hyphenation point
0x2029, // paragraph separator 0x2028, // line separator
0x202A, // left-to-right embedding 0x2029, // paragraph separator
0x202B, // right-to-left embedding 0x202A, // left-to-right embedding
0x202C, // pop directional formatting 0x202B, // right-to-left embedding
0x202D, // left-to-right override 0x202C, // pop directional formatting
0x202E, // right-to-left override 0x202D, // left-to-right override
0x202F, // narrow no-break space 0x202E, // right-to-left override
0x2060, // word joiner 0x202F, // narrow no-break space
0x2061, // function application (one of several invisible mathematical operators) 0x2060, // word joiner
// http://unicode.org/charts/PDF/U3000.pdf 0x2061, // function application (one of several invisible mathematical operators)
0x3000, // ideographic space (CJK) // http://unicode.org/charts/PDF/U3000.pdf
// http://unicode.org/charts/PDF/U0300.pdf 0x3000, // ideographic space (CJK)
0x0301, // combining acute accent (if it appears after "a", it turns into "a" with an accent) // http://unicode.org/charts/PDF/U0300.pdf
// Arabic has the interesting property that most letters connect to the next letter. 0x0301, // combining acute accent (if it appears after "a", it turns into "a" with an accent)
// Some code calls this "shaping". // Arabic has the interesting property that most letters connect to the next letter.
0x0643, // arabic letter kaf // Some code calls this "shaping".
0x0645, // arabic letter meem 0x0643, // arabic letter kaf
0x06CD, // arabic letter yeh with tail 0x0645, // arabic letter meem
0xFDDE, // invalid unicode? but somehow associated with arabic. 0x06CD, // arabic letter yeh with tail
// http://unicode.org/reports/tr36/tr36-7.html#Buffer_Overflows 0xFDDE, // invalid unicode? but somehow associated with arabic.
// Characters with especially high expansion factors when they go through various unicode "normalizations" // http://unicode.org/reports/tr36/tr36-7.html#Buffer_Overflows
0x1F82, // Characters with especially high expansion factors when they go through various unicode "normalizations"
0xFDFA, 0x1F82,
0xFB2C, 0xFDFA,
0x0390, 0xFB2C,
// 0x1D160, // hmm, need surrogates 0x0390,
// Characters with especially high expansion factors when lowercased or uppercased // 0x1D160, // hmm, need surrogates
0x023A, // Characters with especially high expansion factors when lowercased or uppercased
0x0041, 0x023A,
0xDC1D, // a low surrogate 0x0041,
0xDB00, // a high surrogate 0xDC1D, // a low surrogate
// UFFF0.pdf 0xDB00, // a high surrogate
0xFFF9, // interlinear annotation anchor // UFFF0.pdf
0xFFFA, // interlinear annotation seperator 0xFFF9, // interlinear annotation anchor
0xFFFB, // interlinear annotation terminator 0xFFFA, // interlinear annotation seperator
0xFFFC, // object replacement character 0xFFFB, // interlinear annotation terminator
0xFFFD, // replacement character 0xFFFC, // object replacement character
0xFEFF, // zero width no-break space 0xFFFD, // replacement character
0xFFFF, // not a character 0xFEFF, // zero width no-break space
0x00A0, // no-break space 0xFFFF, // not a character
0x2426, 0x00A0, // no-break space
0x003F, 0x2426,
0x00BF, 0x003F,
0xDC80, 0x00BF,
0xDCFF, 0xDC80,
// http://en.wikipedia.org/wiki/Mapping_of_Unicode_characters 0xDCFF,
0x205F, // mathematical space // http://en.wikipedia.org/wiki/Mapping_of_Unicode_characters
0x2061, // mathematical function application 0x205F, // mathematical space
0x2064, // mathematical invisible separator 0x2061, // mathematical function application
0x2044 // fraction slash character 0x2064, // mathematical invisible separator
]) 0x2044 // fraction slash character
])
)
} }
/**
* Generate a random character that affects bidi layout
*/
static bidiCharCodes () { static bidiCharCodes () {
return random.pick([ return String.fromCodePoint(
0x0660, // START_HINDI_DIGITS random.pick([
0x0669, // END_HINDI_DIGITS 0x0660, // START_HINDI_DIGITS
0x066A, // START_ARABIC_SEPARATOR 0x0669, // END_HINDI_DIGITS
0x066B, // END_ARABIC_SEPARATOR 0x066A, // START_ARABIC_SEPARATOR
0x0030, // START_ARABIC_DIGITS 0x066B, // END_ARABIC_SEPARATOR
0x0039, // END_ARABIC_DIGITS 0x0030, // START_ARABIC_DIGITS
0x06f0, // START_FARSI_DIGITS 0x0039, // END_ARABIC_DIGITS
0x06f9 // END_FARSI_DIGITS 0x06f0, // START_FARSI_DIGITS
]) 0x06f9 // END_FARSI_DIGITS
])
)
} }
// http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt /**
static unicodeCombiningCharacters () { * Generate a random unicode combining character
return random.item([ * http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt
[0x0300, 0x036F], // Combining Diacritical Marks */
[0x0483, 0x0489], static unicodeCombiningCharacter () {
[0x07EB, 0x07F3], return String.fromCodePoint(
[0x135D, 0x135F], random.range(
[0x1A7F, 0x1A7F], ...random.item([
[0x1B6B, 0x1B73], [0x0300, 0x036F], // Combining Diacritical Marks
[0x1DC0, 0x1DFF], // Combining Diacritical Marks Supplement [0x0483, 0x0489],
[0x20D0, 0x2DFF], [0x07EB, 0x07F3],
[0x3099, 0x309A], [0x135D, 0x135F],
[0xA66F, 0xA6F1], [0x1A7F, 0x1A7F],
[0xA8E0, 0xA8F1], [0x1B6B, 0x1B73],
[0xFE20, 0xFE26], // Combining Half Marks [0x1DC0, 0x1DFF], // Combining Diacritical Marks Supplement
[0x101FD, 0x101FD], [0x20D0, 0x2DFF],
[0x1D165, 0x1D169], [0x3099, 0x309A],
[0x1D16D, 0x1D172], [0xA66F, 0xA6F1],
[0x1D17B, 0x1D18B], [0xA8E0, 0xA8F1],
[0x1D1AA, 0x1D1AD], [0xFE20, 0xFE26], // Combining Half Marks
[0x1D242, 0x1D244] [0x101FD, 0x101FD],
]) [0x1D165, 0x1D169],
[0x1D16D, 0x1D172],
[0x1D17B, 0x1D18B],
[0x1D1AA, 0x1D1AD],
[0x1D242, 0x1D244]
])
)
)
} }
/**
* Generate a random basic multilingual plane character
*/
static unicodeBMP () { static unicodeBMP () {
return random.item([ return String.fromCodePoint(
// BMP = Basic Multilingual Plane random.range(0x0000, 0xFFFF)
[0x0000, 0xFFFF] )
])
} }
/**
* Generate a random supplementary multilingual plane character
*/
static unicodeSMP () { static unicodeSMP () {
return random.item([ return String.fromCodePoint(
// SMP = Supplementary Multilingual Plane random.range(
[0x10000, 0x13FFF], ...random.item([
[0x16000, 0x16FFF], [0x10000, 0x13FFF],
[0x1B000, 0x1BFFF], [0x16000, 0x16FFF],
[0x1D000, 0x1DFFF], [0x1B000, 0x1BFFF],
[0x1F000, 0x1FFFF] [0x1D000, 0x1DFFF],
]) [0x1F000, 0x1FFFF]
])
)
)
} }
/**
* Generate a random supplementary ideographic plane character
*/
static unicodeSIP () { static unicodeSIP () {
return random.item([ return String.fromCodePoint(
// SIP = Supplementary Ideographic Plane random.range(
[0x20000, 0x2BFFF], ...random.item([
[0x2F000, 0x2FFFF] [0x20000, 0x2BFFF],
]) [0x2F000, 0x2FFFF]
])
)
)
} }
/**
* Generate a random supplementary special-purpose plane character
*/
static unicodeSSP () { static unicodeSSP () {
return random.item([ return String.fromCodePoint(
// SSP = Supplementary Special-purpose Plane random.range(0xE0000, 0xE0FFF)
[0xE0000, 0xE0FFF] )
])
}
static assignmentOperator () {
return random.pick([
'=', '+=', '-=', '*=', '/=', '%=', '**=', '<<=', '>>=', '>>>=', '&=', '^=', '|='
])
}
static arithmeticOperator () {
return random.pick([
'%', '-', '+', '*', '/'
])
} }
static currency () { static currency () {
@ -222,26 +291,48 @@ class text extends make {
return utils.common.quote(text.any()) return utils.common.quote(text.any())
} }
static chars () { /**
return random.pick([ * Wrapper for all text generators
text.controlChar, * @returns {string}
text.token, */
text.assignmentOperator, static random () {
text.arithmeticOperator, return random.choose([
String.fromCharCode(text.layoutCharCodes()), [1, text.alpha],
String.fromCharCode(text.bidiCharCodes()) [1, text.alphanum],
[1, text.arithmeticOperator],
[1, text.assignmentOperator],
[1, text.controlChar],
[1, text.digit],
[1, text.lineEnd],
[1, text.token],
[3, text.layoutCharCodes],
[3, text.bidiCharCodes],
[3, text.unicodeCombiningCharacter],
[3, text.unicodeBMP],
[3, text.unicodeSMP],
[3, text.unicodeSIP],
[3, text.unicodeSSP]
]) ])
} }
/**
* Generate a single character
*/
static character () {
return text.random().charAt(0)
}
/**
* Generate string comprised of random generators
*/
static any () { static any () {
// Generate a string compromised of random individual characters
// This might be too slow to used for all 'texts' uses
let s = '' let s = ''
// TODO: Len calculation take from DOMFuzz - maybe we should revise this? let len = random.range(1, 126)
let len = random.pick([make.number.tiny, make.number.range])
for (let i = 0; i < len; i++) { while (len--) {
s += make.text.chars() s += make.text.random()
} }
return s return s
} }
} }