Refactor text generator
This commit is contained in:
parent
b2206ba0b2
commit
21a6025481
1 changed files with 240 additions and 149 deletions
389
lib/make/text.js
389
lib/make/text.js
|
@ -7,18 +7,66 @@ const make = require('../make')
|
||||||
const utils = require('../utils')
|
const utils = require('../utils')
|
||||||
|
|
||||||
class text extends make {
|
class text extends make {
|
||||||
static lineEnd () {
|
/**
|
||||||
|
* Generate a random alphabetic character
|
||||||
|
*/
|
||||||
|
static alpha () {
|
||||||
|
return String.fromCharCode(random.range('A'.charCodeAt(0), 'z'.charCodeAt(0)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a random alphanumeric character
|
||||||
|
*/
|
||||||
|
static alphanum () {
|
||||||
|
return String.fromCharCode(random.range('0'.charCodeAt(0), 'z'.charCodeAt(0)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a random assignment operator
|
||||||
|
*/
|
||||||
|
static assignmentOperator () {
|
||||||
return random.pick([
|
return random.pick([
|
||||||
'\n', '\r', '\r\n', '\n\r'
|
'=', '+=', '-=', '*=', '/=', '%=', '**=', '<<=', '>>=', '>>>=', '&=', '^=', '|='
|
||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a random arithmetic operator
|
||||||
|
*/
|
||||||
|
static arithmeticOperator () {
|
||||||
|
return random.pick([
|
||||||
|
'%', '-', '+', '*', '/'
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a random control character
|
||||||
|
*/
|
||||||
static controlChar () {
|
static controlChar () {
|
||||||
return random.pick([
|
return random.pick([
|
||||||
'\b', '\t', '\n', '\v', '\f', '\r', '\0', '\c', '\a', '\e' // eslint-disable-line no-useless-escape
|
'\b', '\t', '\n', '\v', '\f', '\r', '\0', '\c', '\a', '\e' // eslint-disable-line no-useless-escape
|
||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a random digit
|
||||||
|
*/
|
||||||
|
static digit () {
|
||||||
|
return String.fromCharCode(random.range('0'.charCodeAt(0), '9'.charCodeAt(0)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a random line ending
|
||||||
|
*/
|
||||||
|
static lineEnd () {
|
||||||
|
return random.pick([
|
||||||
|
'\n', '\r', '\r\n', '\n\r'
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a random token
|
||||||
|
*/
|
||||||
static token () {
|
static token () {
|
||||||
return random.pick([
|
return random.pick([
|
||||||
'*', '+', '%', '-', '!', '^', ':', '|', '&', '<', '>', '.', '"',
|
'*', '+', '%', '-', '!', '^', ':', '|', '&', '<', '>', '.', '"',
|
||||||
|
@ -35,9 +83,9 @@ class text extends make {
|
||||||
static language () {
|
static language () {
|
||||||
return random.pick([
|
return random.pick([
|
||||||
// special casing for i, I, dotted/dotless variants
|
// special casing for i, I, dotted/dotless variants
|
||||||
['tr', 'az'],
|
['tr', 'az', 'crh', 'tt', 'ba'],
|
||||||
// special casing rules: https://developer.mozilla.org/en/CSS/text-transform
|
// special casing rules: https://developer.mozilla.org/en/CSS/text-transform
|
||||||
['nl', 'gr'],
|
['nl', 'el', 'ga'],
|
||||||
// special justification rules
|
// special justification rules
|
||||||
['ja', 'zh'],
|
['ja', 'zh'],
|
||||||
// tend to be RTL
|
// tend to be RTL
|
||||||
|
@ -49,155 +97,176 @@ class text extends make {
|
||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a random character that may affect layout
|
||||||
|
*/
|
||||||
static layoutCharCodes () {
|
static layoutCharCodes () {
|
||||||
return random.pick([
|
return String.fromCodePoint(
|
||||||
0, // null
|
random.pick([
|
||||||
160, // non-breaking space
|
0, // null
|
||||||
0x005C, // backslash, but in some countries, represents local currency symbol (e.g. yen)
|
160, // non-breaking space
|
||||||
0x00AD, // soft hyphen
|
0x005C, // backslash, but in some countries, represents local currency symbol (e.g. yen)
|
||||||
0x0BCC, // a Tamil character that is displayed as three glyphs
|
0x00AD, // soft hyphen
|
||||||
// http://unicode.org/charts/PDF/U2000.pdf
|
0x0BCC, // a Tamil character that is displayed as three glyphs
|
||||||
0x200B, // zero-width space
|
// http://unicode.org/charts/PDF/U2000.pdf
|
||||||
0x200C, // zero-width non-joiner
|
0x200B, // zero-width space
|
||||||
0x200D, // zero-width joiner
|
0x200C, // zero-width non-joiner
|
||||||
0x200E, // left-to-right mark
|
0x200D, // zero-width joiner
|
||||||
0x200F, // right-to-left mark
|
0x200E, // left-to-right mark
|
||||||
0x2011, // non-breaking hyphen
|
0x200F, // right-to-left mark
|
||||||
0x2027, // hyphenation point
|
0x2011, // non-breaking hyphen
|
||||||
0x2028, // line separator
|
0x2027, // hyphenation point
|
||||||
0x2029, // paragraph separator
|
0x2028, // line separator
|
||||||
0x202A, // left-to-right embedding
|
0x2029, // paragraph separator
|
||||||
0x202B, // right-to-left embedding
|
0x202A, // left-to-right embedding
|
||||||
0x202C, // pop directional formatting
|
0x202B, // right-to-left embedding
|
||||||
0x202D, // left-to-right override
|
0x202C, // pop directional formatting
|
||||||
0x202E, // right-to-left override
|
0x202D, // left-to-right override
|
||||||
0x202F, // narrow no-break space
|
0x202E, // right-to-left override
|
||||||
0x2060, // word joiner
|
0x202F, // narrow no-break space
|
||||||
0x2061, // function application (one of several invisible mathematical operators)
|
0x2060, // word joiner
|
||||||
// http://unicode.org/charts/PDF/U3000.pdf
|
0x2061, // function application (one of several invisible mathematical operators)
|
||||||
0x3000, // ideographic space (CJK)
|
// http://unicode.org/charts/PDF/U3000.pdf
|
||||||
// http://unicode.org/charts/PDF/U0300.pdf
|
0x3000, // ideographic space (CJK)
|
||||||
0x0301, // combining acute accent (if it appears after "a", it turns into "a" with an accent)
|
// http://unicode.org/charts/PDF/U0300.pdf
|
||||||
// Arabic has the interesting property that most letters connect to the next letter.
|
0x0301, // combining acute accent (if it appears after "a", it turns into "a" with an accent)
|
||||||
// Some code calls this "shaping".
|
// Arabic has the interesting property that most letters connect to the next letter.
|
||||||
0x0643, // arabic letter kaf
|
// Some code calls this "shaping".
|
||||||
0x0645, // arabic letter meem
|
0x0643, // arabic letter kaf
|
||||||
0x06CD, // arabic letter yeh with tail
|
0x0645, // arabic letter meem
|
||||||
0xFDDE, // invalid unicode? but somehow associated with arabic.
|
0x06CD, // arabic letter yeh with tail
|
||||||
// http://unicode.org/reports/tr36/tr36-7.html#Buffer_Overflows
|
0xFDDE, // invalid unicode? but somehow associated with arabic.
|
||||||
// Characters with especially high expansion factors when they go through various unicode "normalizations"
|
// http://unicode.org/reports/tr36/tr36-7.html#Buffer_Overflows
|
||||||
0x1F82,
|
// Characters with especially high expansion factors when they go through various unicode "normalizations"
|
||||||
0xFDFA,
|
0x1F82,
|
||||||
0xFB2C,
|
0xFDFA,
|
||||||
0x0390,
|
0xFB2C,
|
||||||
// 0x1D160, // hmm, need surrogates
|
0x0390,
|
||||||
// Characters with especially high expansion factors when lowercased or uppercased
|
// 0x1D160, // hmm, need surrogates
|
||||||
0x023A,
|
// Characters with especially high expansion factors when lowercased or uppercased
|
||||||
0x0041,
|
0x023A,
|
||||||
0xDC1D, // a low surrogate
|
0x0041,
|
||||||
0xDB00, // a high surrogate
|
0xDC1D, // a low surrogate
|
||||||
// UFFF0.pdf
|
0xDB00, // a high surrogate
|
||||||
0xFFF9, // interlinear annotation anchor
|
// UFFF0.pdf
|
||||||
0xFFFA, // interlinear annotation seperator
|
0xFFF9, // interlinear annotation anchor
|
||||||
0xFFFB, // interlinear annotation terminator
|
0xFFFA, // interlinear annotation seperator
|
||||||
0xFFFC, // object replacement character
|
0xFFFB, // interlinear annotation terminator
|
||||||
0xFFFD, // replacement character
|
0xFFFC, // object replacement character
|
||||||
0xFEFF, // zero width no-break space
|
0xFFFD, // replacement character
|
||||||
0xFFFF, // not a character
|
0xFEFF, // zero width no-break space
|
||||||
0x00A0, // no-break space
|
0xFFFF, // not a character
|
||||||
0x2426,
|
0x00A0, // no-break space
|
||||||
0x003F,
|
0x2426,
|
||||||
0x00BF,
|
0x003F,
|
||||||
0xDC80,
|
0x00BF,
|
||||||
0xDCFF,
|
0xDC80,
|
||||||
// http://en.wikipedia.org/wiki/Mapping_of_Unicode_characters
|
0xDCFF,
|
||||||
0x205F, // mathematical space
|
// http://en.wikipedia.org/wiki/Mapping_of_Unicode_characters
|
||||||
0x2061, // mathematical function application
|
0x205F, // mathematical space
|
||||||
0x2064, // mathematical invisible separator
|
0x2061, // mathematical function application
|
||||||
0x2044 // fraction slash character
|
0x2064, // mathematical invisible separator
|
||||||
])
|
0x2044 // fraction slash character
|
||||||
|
])
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a random character that affects bidi layout
|
||||||
|
*/
|
||||||
static bidiCharCodes () {
|
static bidiCharCodes () {
|
||||||
return random.pick([
|
return String.fromCodePoint(
|
||||||
0x0660, // START_HINDI_DIGITS
|
random.pick([
|
||||||
0x0669, // END_HINDI_DIGITS
|
0x0660, // START_HINDI_DIGITS
|
||||||
0x066A, // START_ARABIC_SEPARATOR
|
0x0669, // END_HINDI_DIGITS
|
||||||
0x066B, // END_ARABIC_SEPARATOR
|
0x066A, // START_ARABIC_SEPARATOR
|
||||||
0x0030, // START_ARABIC_DIGITS
|
0x066B, // END_ARABIC_SEPARATOR
|
||||||
0x0039, // END_ARABIC_DIGITS
|
0x0030, // START_ARABIC_DIGITS
|
||||||
0x06f0, // START_FARSI_DIGITS
|
0x0039, // END_ARABIC_DIGITS
|
||||||
0x06f9 // END_FARSI_DIGITS
|
0x06f0, // START_FARSI_DIGITS
|
||||||
])
|
0x06f9 // END_FARSI_DIGITS
|
||||||
|
])
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt
|
/**
|
||||||
static unicodeCombiningCharacters () {
|
* Generate a random unicode combining character
|
||||||
return random.item([
|
* http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt
|
||||||
[0x0300, 0x036F], // Combining Diacritical Marks
|
*/
|
||||||
[0x0483, 0x0489],
|
static unicodeCombiningCharacter () {
|
||||||
[0x07EB, 0x07F3],
|
return String.fromCodePoint(
|
||||||
[0x135D, 0x135F],
|
random.range(
|
||||||
[0x1A7F, 0x1A7F],
|
...random.item([
|
||||||
[0x1B6B, 0x1B73],
|
[0x0300, 0x036F], // Combining Diacritical Marks
|
||||||
[0x1DC0, 0x1DFF], // Combining Diacritical Marks Supplement
|
[0x0483, 0x0489],
|
||||||
[0x20D0, 0x2DFF],
|
[0x07EB, 0x07F3],
|
||||||
[0x3099, 0x309A],
|
[0x135D, 0x135F],
|
||||||
[0xA66F, 0xA6F1],
|
[0x1A7F, 0x1A7F],
|
||||||
[0xA8E0, 0xA8F1],
|
[0x1B6B, 0x1B73],
|
||||||
[0xFE20, 0xFE26], // Combining Half Marks
|
[0x1DC0, 0x1DFF], // Combining Diacritical Marks Supplement
|
||||||
[0x101FD, 0x101FD],
|
[0x20D0, 0x2DFF],
|
||||||
[0x1D165, 0x1D169],
|
[0x3099, 0x309A],
|
||||||
[0x1D16D, 0x1D172],
|
[0xA66F, 0xA6F1],
|
||||||
[0x1D17B, 0x1D18B],
|
[0xA8E0, 0xA8F1],
|
||||||
[0x1D1AA, 0x1D1AD],
|
[0xFE20, 0xFE26], // Combining Half Marks
|
||||||
[0x1D242, 0x1D244]
|
[0x101FD, 0x101FD],
|
||||||
])
|
[0x1D165, 0x1D169],
|
||||||
|
[0x1D16D, 0x1D172],
|
||||||
|
[0x1D17B, 0x1D18B],
|
||||||
|
[0x1D1AA, 0x1D1AD],
|
||||||
|
[0x1D242, 0x1D244]
|
||||||
|
])
|
||||||
|
)
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a random basic multilingual plane character
|
||||||
|
*/
|
||||||
static unicodeBMP () {
|
static unicodeBMP () {
|
||||||
return random.item([
|
return String.fromCodePoint(
|
||||||
// BMP = Basic Multilingual Plane
|
random.range(0x0000, 0xFFFF)
|
||||||
[0x0000, 0xFFFF]
|
)
|
||||||
])
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a random supplementary multilingual plane character
|
||||||
|
*/
|
||||||
static unicodeSMP () {
|
static unicodeSMP () {
|
||||||
return random.item([
|
return String.fromCodePoint(
|
||||||
// SMP = Supplementary Multilingual Plane
|
random.range(
|
||||||
[0x10000, 0x13FFF],
|
...random.item([
|
||||||
[0x16000, 0x16FFF],
|
[0x10000, 0x13FFF],
|
||||||
[0x1B000, 0x1BFFF],
|
[0x16000, 0x16FFF],
|
||||||
[0x1D000, 0x1DFFF],
|
[0x1B000, 0x1BFFF],
|
||||||
[0x1F000, 0x1FFFF]
|
[0x1D000, 0x1DFFF],
|
||||||
])
|
[0x1F000, 0x1FFFF]
|
||||||
|
])
|
||||||
|
)
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a random supplementary ideographic plane character
|
||||||
|
*/
|
||||||
static unicodeSIP () {
|
static unicodeSIP () {
|
||||||
return random.item([
|
return String.fromCodePoint(
|
||||||
// SIP = Supplementary Ideographic Plane
|
random.range(
|
||||||
[0x20000, 0x2BFFF],
|
...random.item([
|
||||||
[0x2F000, 0x2FFFF]
|
[0x20000, 0x2BFFF],
|
||||||
])
|
[0x2F000, 0x2FFFF]
|
||||||
|
])
|
||||||
|
)
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a random supplementary special-purpose plane character
|
||||||
|
*/
|
||||||
static unicodeSSP () {
|
static unicodeSSP () {
|
||||||
return random.item([
|
return String.fromCodePoint(
|
||||||
// SSP = Supplementary Special-purpose Plane
|
random.range(0xE0000, 0xE0FFF)
|
||||||
[0xE0000, 0xE0FFF]
|
)
|
||||||
])
|
|
||||||
}
|
|
||||||
|
|
||||||
static assignmentOperator () {
|
|
||||||
return random.pick([
|
|
||||||
'=', '+=', '-=', '*=', '/=', '%=', '**=', '<<=', '>>=', '>>>=', '&=', '^=', '|='
|
|
||||||
])
|
|
||||||
}
|
|
||||||
|
|
||||||
static arithmeticOperator () {
|
|
||||||
return random.pick([
|
|
||||||
'%', '-', '+', '*', '/'
|
|
||||||
])
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static currency () {
|
static currency () {
|
||||||
|
@ -222,26 +291,48 @@ class text extends make {
|
||||||
return utils.common.quote(text.any())
|
return utils.common.quote(text.any())
|
||||||
}
|
}
|
||||||
|
|
||||||
static chars () {
|
/**
|
||||||
return random.pick([
|
* Wrapper for all text generators
|
||||||
text.controlChar,
|
* @returns {string}
|
||||||
text.token,
|
*/
|
||||||
text.assignmentOperator,
|
static random () {
|
||||||
text.arithmeticOperator,
|
return random.choose([
|
||||||
String.fromCharCode(text.layoutCharCodes()),
|
[1, text.alpha],
|
||||||
String.fromCharCode(text.bidiCharCodes())
|
[1, text.alphanum],
|
||||||
|
[1, text.arithmeticOperator],
|
||||||
|
[1, text.assignmentOperator],
|
||||||
|
[1, text.controlChar],
|
||||||
|
[1, text.digit],
|
||||||
|
[1, text.lineEnd],
|
||||||
|
[1, text.token],
|
||||||
|
[3, text.layoutCharCodes],
|
||||||
|
[3, text.bidiCharCodes],
|
||||||
|
[3, text.unicodeCombiningCharacter],
|
||||||
|
[3, text.unicodeBMP],
|
||||||
|
[3, text.unicodeSMP],
|
||||||
|
[3, text.unicodeSIP],
|
||||||
|
[3, text.unicodeSSP]
|
||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a single character
|
||||||
|
*/
|
||||||
|
static character () {
|
||||||
|
return text.random().charAt(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate string comprised of random generators
|
||||||
|
*/
|
||||||
static any () {
|
static any () {
|
||||||
// Generate a string compromised of random individual characters
|
|
||||||
// This might be too slow to used for all 'texts' uses
|
|
||||||
let s = ''
|
let s = ''
|
||||||
// TODO: Len calculation take from DOMFuzz - maybe we should revise this?
|
let len = random.range(1, 126)
|
||||||
let len = random.pick([make.number.tiny, make.number.range])
|
|
||||||
for (let i = 0; i < len; i++) {
|
while (len--) {
|
||||||
s += make.text.chars()
|
s += make.text.random()
|
||||||
}
|
}
|
||||||
|
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue