Navigation menu
Personal tools
Not logged in
Talk
Contributions
Create account
Log in
Namespaces
WoW
Talk
English
Views
Read
Edit
History
More
Search
Navigation
Home
Random page
Help using wiki
Editions
for WoW
for WildStar
for Solar2D
Documentation
for WoW
for WildStar
Reference
WoW
⦁ FrameXML
⦁ AddOns
⦁ API
⦁ WoW Lua
WildStar
⦁ AddOns
⦁ API
⦁ WildStar Lua
Engine
Tools
What links here
Related changes
Special pages
Page information
Site
Recent Changes
Editing
WoW:StringHash/Analysis
(section)
Jump to navigation
Jump to search
Warning:
You are not logged in. Your IP address will be publicly visible if you make any edits. If you
log in
or
create an account
, your edits will be attributed to your username, along with other benefits.
Anti-spam check. Do
not
fill this in!
= Appendix: Lua hash analysis code = <div style="margin-left: 3%;"> <div style="max-width: 80em; margin-right: 2em; height: 20em; overflow: scroll;"> mod=math.mod; -- This is the "Java hash", a variant of Dan Bernstein's old "33" hash posted long ago in the comp.lang.c newsgroup. -- The *31 basically translates to "new = old<<5 - old", i.e. you never completely lose data from earlier bytes as you would by only shifting -- The mod is necessary in Lua; otherwise the value turns floating-point and you lose precision after ~10 characters -- This function would be better with a prime modulus, but 2^32 is chosen as a mod to emulate the 32-bit math used in the original function JavaHash(text) local counter = 0; local len = string.len(text); for i = 1, len do counter = (counter*31) + string.byte(text, i) counter = mod(counter, 4294967296); -- 2^32 end; return mod(counter, 4294967296); -- 2^32 end -- This string hash is ~30% faster due to working with 3 characters at a time; the mod expression is fairly expensive -- The multiplications are all chosen to affect 3 areas of the accumulator at a time: -- 8161 = 8192 - 32 + 1 : 2<<13, 2<< 5, 2<<0 -- 16776193 = 16777216 - 1024 + 1 : 2<<24, 2<<10, 2<<0 -- 8372226 = 8388608 - 16384 + 2 : 2<<23, 2<<14, 2<<5 -- 3932164 = 4194304 - 262144 + 4 : 2<<22, 2<<18, 2<<10 function StringHash(text) local counter = 1; local len = string.len(text); for i = 1, len, 3 do counter = mod(counter*8161, 4294967279) + -- 2^32 - 17: Prime! (counter*8161 is at most a 48-bit number, which easily fits in the 52-bit mantissa of a double precision float) (string.byte(text,i)*16776193) + ((string.byte(text,i+1) or (len-i+256))*8372226) + ((string.byte(text,i+2) or (len-i+256))*3932164); end; return mod(counter, 4294967291); -- 2^32 - 5: Prime (and different from the prime in the loop) end -- Utility function: return number of bits changed between a and b function bitsdiff(a,b) local n=0; for i=1,32 do if(mod(a,2) ~= mod(b,2)) then n=n+1; end a=math.floor(a/2); b=math.floor(b/2); end return n; end function tcopy(to,from) for k,v in pairs(from) do to[k]=v; end end -- Bit transmutation test: Count how many bits change as a result of a single bit of input flipping function transtest(func,num, len, first, last) local bitsdiffhits = {} local origstrbytes = { }; for c=1,len do table.insert(origstrbytes, 0); end math.randomseed(2); for rep=1,num do local strbytes = {}; for c=first,last do tcopy(strbytes, origstrbytes); local bitval=1; for b=1,8 do local before = func(string.char(unpack(strbytes))); strbytes[c] = origstrbytes[c]; strbytes[c] = math.mod(origstrbytes[c] + bitval, 256); local after = func(string.char(unpack(strbytes))); local n = bitsdiff(before,after); bitsdiffhits[n] = (bitsdiffhits[n] or 0) + 1; bitval=bitval*2; end end for c=1,table.getn(origstrbytes) do origstrbytes[c]=math.random(0,255); end end local score=0; for i=0,32 do score = score - (bitsdiffhits[i] or 0)*(math.abs(16-i)/16); -- 16 = 50% of the bits, which is how many bits we want to change in a perfect distribution print(string.format(" %2u: %5u", i, bitsdiffhits[i] or 0)); end print(" SCORE: "..score); -- 0 is unimaginatively good. negative is bad. end -- Performance test: Just measure the time it takes to execute a hash on a fixed string lots of times (including the cost of the loop, obviously) function perftest(func, num) local x=os.clock(); for i=1,num do func("abcdefghijklmnopqrstuvwxyz"); end return os.clock()-x; end -- Count the number of single, double, triple etc collisions given a certain manipulation of the resultant hash: -- div by X, take modulus Y -- The modulus is there to create an artificial range clamping; testing would take too long otherwise. -- Should match expected Poisson distribution closely for an unbiased hash. function collisiontest(func, num, div,modulus, randfunc) local x = os.clock(); local outputs = {} local collisions = {} math.randomseed(1); for i=1,num do str = randfunc(); local n = math.mod(math.floor(func(str)/div), modulus); outputs[n] = (outputs[n] or 0)+1; end for _,n in pairs(outputs) do collisions[n] = (collisions[n] or 0) + n; end local y = os.clock(); local sortcol = {} for k,v in pairs(collisions) do table.insert(sortcol, k); end table.sort(sortcol); -- Compute expected Poisson distribution expect = {} lambda = num/modulus; fly = lambda * math.exp(-lambda); for c=1,10 do expect[c] = fly * modulus; fly = fly * lambda / c; end -- Output results and expected results for _,k in ipairs(sortcol) do print(string.format(" %2u: %5u (expected %5u)", k, collisions[k], expect[k] or -1)); end return y-x; end -- Bit distribution test: Measure number of times each bit is set, should average out to 50% chance in the end -- Will be presented as the absolute difference from 50% function bitdistribtest(func,num,lo,hi,dispmin) math.randomseed(1); local bitsset={}; for i=1,num do str = string.char(math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi), math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi), math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi)); local n = func(str); for k=1,32 do if(mod(n,2)==1) then bitsset[k] = (bitsset[k] or 0)+1; end n=math.floor(n/2); end end for k=1,32 do local diff = math.abs(50-(bitsset[k] or 0)/num*100); if(diff > dispmin) then print(string.format(" %2u: %.1f%%", k, diff)); end end end -- Collision finder test: Loop all possible values in given set and count collisions (yes, this is slow, and uses quite a bit of ram) function collisionfinder(func, lo, hi, bytes, prefix, suffix) local str; local coll={}; prefix = prefix or ""; suffix = suffix or ""; if(bytes==2) then for b=lo,hi do for a=lo,hi do str = prefix .. string.char(a,b) .. suffix; local n = func(str); coll[n] = (coll[n] or 0) + 1; end end elseif(bytes==3) then for c=lo,hi do for b=lo,hi do for a=lo,hi do str = prefix .. string.char(a,b,c) .. suffix; local n = func(str); coll[n] = (coll[n] or 0) + 1; end end end elseif(bytes==4) then for d=lo,hi do for c=lo,hi do for b=lo,hi do for a=lo,hi do str = prefix .. string.char(a,b,c,d) .. suffix; local n = func(str); coll[n] = (coll[n] or 0) + 1; end end end end elseif(bytes==5) then for e=lo,hi do for d=lo,hi do for c=lo,hi do for b=lo,hi do for a=lo,hi do str = prefix .. string.char(a,b,c,d,e) .. suffix; local n = func(str); coll[n] = (coll[n] or 0) + 1; end end end end end end local collcnt = {}; for k,n in coll do collcnt[n] = (collcnt[n] or 0) + 1; end for colls, count in pairs(collcnt) do if(colls==1) then print(string.format(" %5u non-colliding values", count)); else print(string.format(" %5u instances of %u inputs producing the same hash (%u strings)", count, colls, count*colls)); end end end --------------------------------------------------------------------------- -- Run the tests if(true) then local colls; local function longrand(lo,hi) return function() local ret = ""; while colls[ret] do ret = "aaaaaaaaa"..string.char(math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi), math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi), math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi),math.random(lo,hi)); end colls[ret] = true; return ret; end end print ""; print ""; print "Collision test - long random string 32-127 (20000 values into 32768 buckets)"; print "------------------------------------------" print("JavaHash:"); colls = {}; print(collisiontest(JavaHash, 20000, 1,32768, longrand(32,127))); print("StringHash:"); colls = {}; print(collisiontest(StringHash, 20000, 1,32768, longrand(32,127))); print ""; print "Collision test - long random string 0-255 (20000 values into 32768 buckets, bits 1-15)"; print "------------------------------------------" print("JavaHash:"); colls = {}; print(collisiontest(JavaHash, 20000, 1,32768, longrand(0,255))); print("StringHash:"); colls = {}; print(collisiontest(StringHash, 20000, 1,32768, longrand(0,255))); print ""; print "Collision test - long random string 0-255 (20000 values into 32768 buckets, bits 6-20)"; print "------------------------------------------" print("JavaHash:"); colls = {}; print(collisiontest(JavaHash, 20000, math.pow(2,5),32768, longrand(0,255))); print("StringHash:"); colls = {}; print(collisiontest(StringHash, 20000, math.pow(2,5),32768, longrand(0,255))); print ""; print "Collision test - long random string 0-255 (20000 values into 32768 buckets, bits 12-26)"; print "------------------------------------------" print("JavaHash:"); colls = {}; print(collisiontest(JavaHash, 20000, math.pow(2,11),32768, longrand(0,255))); print("StringHash:"); colls = {}; print(collisiontest(StringHash, 20000, math.pow(2,11),32768, longrand(0,255))); print ""; print "Collision test - long random string 0-255 (20000 values into 32768 buckets, bits 18-32)"; print "------------------------------------------" print("JavaHash:"); colls = {}; print(collisiontest(JavaHash, 20000, math.pow(2,17),32768, longrand(0,255))); print("StringHash:"); colls = {}; print(collisiontest(StringHash, 20000, math.pow(2,17),32768, longrand(0,255))); end if(true) then print ""; print ""; print "Performance test"; print "-----------------" print("JavaHash speed: ", perftest(JavaHash, 20000)); print("StringHash speed: ", perftest(StringHash, 20000)); end if(true) then print ""; print ""; print "Bit transmutation test (how many bits change if a single bit changes?)"; print "----------------------"; print(""); print("JavaHash 13 bytes total, change 1-13:"); transtest(JavaHash,50, 13, 1,13); print(""); print("StringHash 13 bytes total, change 1-13:"); transtest(StringHash,50, 13, 1,13); print(""); print("JavaHash 13 bytes total, change 1-2:"); transtest(JavaHash,50, 13, 1,2); print(""); print("StringHash 13 bytes total, change 1-2:"); transtest(StringHash,50, 13, 1,2); print(""); print("JavaHash 13 bytes total, change 12-13:"); transtest(JavaHash,50, 13, 12,13); print(""); print("StringHash 13 bytes total, change 12-13:"); transtest(StringHash,50, 13, 12,13); end if(true) then print ""; print ""; print "Bit distribution test (what's the chance of each bit being set? expressed as abs diff from 50%)"; print "---------------------"; print(""); print("JavaHash 0-255, 10000 loops: (showing diffs>0.4%)"); bitdistribtest(JavaHash,10000,0,255, 0.4); print(""); print("StringHash 0-255, 10000 loops: (showing diffs>0.4%)"); bitdistribtest(StringHash,10000,0,255, 0.4); print(""); print("JavaHash 32-127, 10000 loops: (showing diffs>0.4%)"); bitdistribtest(JavaHash,10000,32,127, 0.4); print(""); print("StringHash 32-127, 10000 loops: (showing diffs>0.4%)"); bitdistribtest(StringHash,10000,32,127, 0.4); end if(true) then print ""; print ""; print "Collision finder (run through all possible values of a set and count collisions)"; print "----------------"; print(""); print("JavaHash, 64-89 x 3 bytes"); collisionfinder(JavaHash, 64, 89, 3); print(""); print("StringHash, 64-89 x 3 bytes"); collisionfinder(StringHash, 64, 89, 3); print(""); print(""); print("JavaHash, 64-89 x 4 bytes"); collisionfinder(JavaHash, 64, 89, 4); print(""); print("StringHash, 64-89 x 4 bytes"); collisionfinder(StringHash, 64, 89, 4); print(""); print(""); print("JavaHash, 32-127 x 2 bytes"); collisionfinder(JavaHash, 32, 127, 2); print(""); print("StringHash, 32-127 x 2 bytes"); collisionfinder(StringHash, 32, 127, 2); print(""); print(""); print("JavaHash, 32-127 x 3 bytes"); collisionfinder(JavaHash, 32, 127, 3); print(""); print("StringHash, 32-127 x 3 bytes"); collisionfinder(StringHash, 32, 127, 3); print(""); print(""); print("JavaHash, 32-127 x 3 bytes, prefix '01234567890123456789'"); collisionfinder(JavaHash, 32, 127, 3, "01234567890123456789"); print(""); print("StringHash, 32-127 x 3 bytes, prefix '01234567890123456789'"); collisionfinder(StringHash, 32, 127, 3, "01234567890123456789"); print(""); print(""); print("JavaHash, 32-127 x 3 bytes, suffix '01234567890123456789'"); collisionfinder(JavaHash, 32, 127, 3, "", "01234567890123456789"); print(""); print("StringHash, 32-127 x 3 bytes, suffix '01234567890123456789'"); collisionfinder(StringHash, 32, 127, 3, "", "01234567890123456789"); print(""); -- This test takes a looooong time if(true) then print(""); print("JavaHash, 64-82 x 5 bytes"); collisionfinder(JavaHash, 64, 82, 5); print(""); print("StringHash, 64-82 x 5 bytes"); collisionfinder(StringHash, 64, 82, 5); end end </div> </div> [[Category:User defined functions]]
Summary:
Please note that all contributions to AddOn Studio are considered to be released under the Creative Commons Attribution-NonCommercial-ShareAlike (see
AddOn Studio Wiki:Copyrights
for details).
Submissions must be written by you, or copied from a public domain or similar free resource (see
AddOn Studio Wiki:Copyrights
for details).
Cancel
Editing help
(opens in new window)