blob: 55103453a3d2647957c3c3a2cf64f0405aab2bac [file] [log] [blame]
# Licensed under the Mozilla Public Licence 2.0.
# https://www.mozilla.org/en-US/MPL/2.0
import uuid
import slugid
def testEncode():
""" Test that we can correctly encode a "non-nice" uuid (with first bit
set) to its known slug. The specific uuid was chosen since it has a slug
which contains both `-` and `_` characters."""
# 10000000010011110011111111001000110111111100101101001011000001101000100111111011101011101111101011010101111000011000011101010100....
# <8 ><0 ><4 ><f ><3 ><f ><c ><8 ><d ><f ><c ><b ><4 ><b ><0 ><6 ><8 ><9 ><f ><b ><a ><e ><f ><a ><d ><5 ><e ><1 ><8 ><7 ><5 ><4 >
# < g >< E >< 8 >< _ >< y >< N >< _ >< L >< S >< w >< a >< J >< - >< 6 >< 7 >< 6 >< 1 >< e >< G >< H >< V >< A >
uuid_ = uuid.UUID('{804f3fc8-dfcb-4b06-89fb-aefad5e18754}')
expectedSlug = 'gE8_yN_LSwaJ-6761eGHVA'
actualSlug = slugid.encode(uuid_)
assert expectedSlug == actualSlug, "UUID not correctly encoded into slug: '" + expectedSlug + "' != '" + actualSlug + "'"
def testDecode():
""" Test that we can decode a "non-nice" slug (first bit of uuid is set)
that begins with `-`"""
# 11111011111011111011111011111011111011111011111001000011111011111011111111111111111111111111111111111111111111111111111111111101....
# <f ><b ><e ><f ><b ><e ><f ><b ><e ><f ><b ><e ><4 ><3 ><e ><f ><b ><f ><f ><f ><f ><f ><f ><f ><f ><f ><f ><f ><f ><f ><f ><d >
# < - >< - >< - >< - >< - >< - >< - >< - >< Q >< - >< - >< - >< _ >< _ >< _ >< _ >< _ >< _ >< _ >< _ >< _ >< Q >
slug = '--------Q--__________Q'
expectedUuid = uuid.UUID('{fbefbefb-efbe-43ef-bfff-fffffffffffd}')
actualUuid = slugid.decode(slug)
assert expectedUuid == actualUuid, "Slug not correctly decoded into uuid: '" + str(expectedUuid) + "' != '" + str(actualUuid) + "'"
def testUuidEncodeDecode():
""" Test that 10000 v4 uuids are unchanged after encoding and then decoding them"""
for i in range(0, 10000):
uuid1 = uuid.uuid4()
slug = slugid.encode(uuid1)
uuid2 = slugid.decode(slug)
assert uuid1 == uuid2, "Encode and decode isn't identity: '" + str(uuid1) + "' != '" + str(uuid2) + "'"
def testSlugDecodeEncode():
""" Test that 10000 v4 slugs are unchanged after decoding and then encoding them."""
for i in range(0, 10000):
slug1 = slugid.v4()
uuid_ = slugid.decode(slug1)
slug2 = slugid.encode(uuid_)
assert slug1 == slug2, "Decode and encode isn't identity"
def testSpreadNice():
""" Make sure that all allowed characters can appear in all allowed
positions within the "nice" slug. In this test we generate over a thousand
slugids, and make sure that every possible allowed character per position
appears at least once in the sample of all slugids generated. We also make
sure that no other characters appear in positions in which they are not
allowed.
base 64 encoding char -> value:
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_
0 1 2 3 4 5 6
0123456789012345678901234567890123456789012345678901234567890123
e.g. from this we can see 'j' represents 35 in base64
The following comments show the 128 bits of the v4 uuid in binary, hex and
base 64 encodings. The 6 fixed bits (`0`/`1`) according to RFC 4122, plus
the first (most significant) fixed bit (`0`) are shown among the 121
arbitrary value bits (`.`/`x`). The `x` means the same as `.` but just
highlights which bits are grouped together for the respective encoding.
schema:
<..........time_low............><...time_mid...><time_hi_+_vers><clk_hi><clk_lo><.....................node.....................>
bin: 0xxx............................................0100............10xx............................................................
hex: $A <01><02><03><04><05><06><07><08><09><10><11> 4 <13><14><15> $B <17><18><19><20><21><22><23><24><25><26><27><28><29><30><31>
=> $A in {0, 1, 2, 3, 4, 5, 6, 7} (0b0xxx)
=> $B in {8, 9, A, B} (0b10xx)
bin: 0xxxxx..........................................0100xx......xxxx10............................................................xx0000
b64: $C < 01 >< 02 >< 03 >< 04 >< 05 >< 06 >< 07 > $D < 09 > $E < 11 >< 12 >< 13 >< 14 >< 15 >< 16 >< 17 >< 18 >< 19 >< 20 > $F
=> $C in {A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z, a, b, c, d, e, f} (0b0xxxxx)
=> $D in {Q, R, S, T} (0b0100xx)
=> $E in {C, G, K, O, S, W, a, e, i, m, q, u, y, 2, 6, -} (0bxxxx10)
=> $F in {A, Q, g, w} (0bxx0000)"""
charsAll = ''.join(sorted('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'))
# 0 - 31: 0b0xxxxx
charsC = ''.join(sorted('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef'))
# 16, 17, 18, 19: 0b0100xx
charsD = ''.join(sorted('QRST'))
# 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62: 0bxxxx10
charsE = ''.join(sorted('CGKOSWaeimquy26-'))
# 0, 16, 32, 48: 0bxx0000
charsF = ''.join(sorted('AQgw'))
expected = [charsC, charsAll, charsAll, charsAll, charsAll, charsAll, charsAll, charsAll, charsD, charsAll, charsE, charsAll, charsAll, charsAll, charsAll, charsAll, charsAll, charsAll, charsAll, charsAll, charsAll, charsF]
spreadTest(slugid.nice, expected)
def testSpreadV4():
""" This test is the same as niceSpreadTest but for slugid.v4() rather than
slugid.nice(). The only difference is that a v4() slug can start with any of
the base64 characters since the first six bits of the uuid are random."""
charsAll = ''.join(sorted('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'))
# 16, 17, 18, 19: 0b0100xx
charsD = ''.join(sorted('QRST'))
# 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62: 0bxxxx10
charsE = ''.join(sorted('CGKOSWaeimquy26-'))
# 0, 16, 32, 48: 0bxx0000
charsF = ''.join(sorted('AQgw'))
expected = [charsAll, charsAll, charsAll, charsAll, charsAll, charsAll, charsAll, charsAll, charsD, charsAll, charsE, charsAll, charsAll, charsAll, charsAll, charsAll, charsAll, charsAll, charsAll, charsAll, charsAll, charsF]
spreadTest(slugid.v4, expected)
def spreadTest(generator, expected):
""" `spreadTest` runs a test against the `generator` function, to check that
when calling it 64*40 times, the range of characters per string position it
returns matches the array `expected`, where each entry in `expected` is a
string of all possible characters that should appear in that position in the
string, at least once in the sample of 64*40 responses from the `generator`
function"""
# k is an array which stores which characters were found at which
# positions. It has one entry per slugid character, therefore 22 entries.
# Each entry is a dict with a key for each character found, and its value
# as the number of times that character appeared at that position in the
# slugid in the large sample of slugids generated in this test.
k = [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}]
# Generate a large sample of slugids, and record what characters appeared
# where... A monte-carlo test has demonstrated that with 64 * 20
# iterations, no failure occurred in 1000 simulations, so 64 * 40 should be
# suitably large to rule out false positives.
for i in range(0, 64 * 40):
slug = generator()
assert len(slug) == 22
for j in range(0, 22):
if slug[j] in k[j]:
k[j][slug[j]] = k[j][slug[j]] + 1
else:
k[j][slug[j]] = 1
# Compose results into an array `actual`, for comparison with `expected`
actual = []
for j in range(0, len(k)):
actual.append('')
for a in k[j].keys():
if k[j][a] > 0:
actual[j] += a
# sort for easy comparison
actual[j] = ''.join(sorted(actual[j]))
assert arraysEqual(expected, actual), "In a large sample of generated slugids, the range of characters found per character position in the sample did not match expected results.\n\nExpected: " + str(expected) + "\n\nActual: " + str(actual)
def arraysEqual(a, b):
""" returns True if arrays a and b are equal"""
return cmp(a, b) == 0