1 |
On Sat, Jun 07, 2014 at 12:03:29AM +0300, Matti Nykyri wrote: |
2 |
> On Thu, Jun 05, 2014 at 10:58:51PM -0500, Canek Peláez Valdés wrote: |
3 |
> > On Thu, Jun 5, 2014 at 9:56 PM, <meino.cramer@×××.de> wrote: |
4 |
> > > Hi, |
5 |
> > > |
6 |
> > > I am experimenting with the C code of the ISAAC pseudo random number generator |
7 |
> > > (http://burtleburtle.net/bob/rand/isaacafa.html). |
8 |
> > > |
9 |
> > > Currently the implementation creates (on my embedded linux) 32 bit |
10 |
> > > hexadecimal output. |
11 |
> > |
12 |
> > So it's a 32 bit integer. |
13 |
> > |
14 |
> > > From this I want to create random numbers in the range of [a-Za-z0-9] |
15 |
> > > *without violating randomness* and (if possible) without throwing |
16 |
> > > away bits of the output. |
17 |
> > |
18 |
> > You mean *characters* int the range [A-Za-z0-9]? |
19 |
> |
20 |
> Well this isn't as simple problem as it sounds. A random 32 bit integer |
21 |
> has 32 bits of randomness. If you take a divison reminder of 62 from this |
22 |
> integer you will get only 5,95419631039 bits of randomness |
23 |
> (log(62)/log(2)). So you are wasting 81,4% of your random data. Which is |
24 |
> quite much and usually random data is quite expensive. You can save your |
25 |
> precious random data by taking only 6 bit from your 32 bit integer and |
26 |
> dividing it by 62. Then you will be wasting only 0,8% of random data. |
27 |
> Another problem is alignment, but that is about mathematical correctness. |
28 |
> |
29 |
> > > How can I do this mathemtically (in concern of the quality of output) |
30 |
> > > correct? |
31 |
> > |
32 |
> > The easiest thing to do would be: |
33 |
> |
34 |
> The easiest is not mathematically correct though. Random data will stay |
35 |
> random only if you select and modify it so that randomness is preserved. |
36 |
> If you take devison reminder of 62 from 32 bit integer there are 69 273 |
37 |
> 667 possibilities of the reminder to be 3 or less. For the reminder to 4 |
38 |
> or more the number of possibilities is 69 273 666. In mathematically |
39 |
> ideal case the probability for every index of the list should be same: |
40 |
> 1/62 = 1,61290322581%. But the modulo 62 modifies this probability: for |
41 |
> index 0-3 the probability is 69 273 667/2^32 = 1,61290324759%. And for |
42 |
> indexes 4-61 the probability will be 69 273 666/2^32 = 1,6129032243%. |
43 |
> |
44 |
> If you wish not to waste those random bits the probabilities will get |
45 |
> worse. With 6 bits of random the probability for index 0-1 will be 2/64 |
46 |
> and for 2-63 it will be 1/64. This is a very significant change because |
47 |
> first and second index will appear twice as much as the rest. If you add |
48 |
> 2 characters to your list you will perfect alignment and you can take 6 |
49 |
> bits of data without it modifying probabilities. |
50 |
> |
51 |
> If you are looking a mathematically perfect solution there is a simple |
52 |
> one even if your list is not in the power of 2! Take 6 bits at a time of |
53 |
> the random data. If the result is 62 or 63 you will discard the data and |
54 |
> get the next 6 bits. This selectively modifies the random data but keeps |
55 |
> the probabilities in correct balance. Now the probability for index of |
56 |
> 0-61 is 1/62 because the probability to get 62-63 out of 64 if 0. |
57 |
> |
58 |
> > ------------------------------------------------------------------------------- |
59 |
> > #include <time.h> |
60 |
> > #include <stdio.h> |
61 |
> > #include <stdlib.h> |
62 |
> > |
63 |
> > #define N (26+26+10) |
64 |
> > |
65 |
> > static char S[] = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', |
66 |
> > 'K', 'L', 'M', |
67 |
> > 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', |
68 |
> > 'X', 'Y', 'Z', |
69 |
> > 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', |
70 |
> > 'k', 'l', 'm', |
71 |
> > 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', |
72 |
> > 'x', 'y', 'z', |
73 |
> > '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }; |
74 |
> > |
75 |
> > int |
76 |
> > next_character() |
77 |
> > { |
78 |
> > // Use the correct call for ISAAC instead of rand() |
79 |
> > unsigned int idx = rand() % N; |
80 |
> > return S[idx]; |
81 |
> > } |
82 |
> |
83 |
> so modify the next_char function: |
84 |
> |
85 |
> char next_character() |
86 |
> { |
87 |
> static unsigned int rand = 0; //(sizeof(int) = 32) |
88 |
> static char bit_avail = 0; |
89 |
> char result = 0; |
90 |
> char move_bits = 0; |
91 |
> char bits_moved = 0; |
92 |
> |
93 |
> do { |
94 |
> if (!bits_avail) { |
95 |
> // Use the correct call for ISAAC instead of rand() |
96 |
> rand = rand(); |
97 |
> |
98 |
> bit_avail = 32; |
99 |
> } |
100 |
> |
101 |
> move_bits = bits_avail >= 6 ? 6 : bits_avail; |
102 |
> result <<= move_bits; |
103 |
> result = (result | rand & (0xFF >> (8 - move_bits))) & 0x3F; |
104 |
> bits_avail -= move_bits; |
105 |
> bits_moved += move_bits; |
106 |
> rand >>= move_bits; |
107 |
> |
108 |
> } while (bits_moved != 6 && result > 61); |
109 |
> |
110 |
> return result; |
111 |
> } |
112 |
|
113 |
Well actually it looks simpler if you break this like this: |
114 |
|
115 |
unsigned char get_6bits () |
116 |
{ |
117 |
static unsigned int rand = 0; //(sizeof(int) = 32) |
118 |
static char bits_avail = 0; |
119 |
unsigned char result = 0; |
120 |
|
121 |
//get 2 bits 3 times: 32 is devidable by 2 |
122 |
for (int i = 0; i < 3; i++) { // --std=c99 |
123 |
//Fill buffer if it is empty! |
124 |
if (!bits_avail || bits_avail < 0 ) { //if bits_avail < 0 it is an error! |
125 |
// Use the correct call for ISAAC instead of rand() |
126 |
rand = rand(); |
127 |
|
128 |
bits_avail = 32; |
129 |
} |
130 |
|
131 |
result <<= 2; //move two bits to left. |
132 |
result = result | (rand & 0x3); //add two least signifigant bits to the result. |
133 |
rand >>= 2; //move two bits to right. |
134 |
bits_avail -= 2; |
135 |
} |
136 |
|
137 |
return result; //result has 6 bits of random data... |
138 |
} |
139 |
|
140 |
char next_character() |
141 |
{ |
142 |
unsigned char idx = 0; |
143 |
do { |
144 |
idx = get_6bits(); |
145 |
} while (idx > 61); |
146 |
|
147 |
return S[idx]; |
148 |
} |
149 |
|
150 |
Very simple :) |
151 |
|
152 |
> This function will give perfect distribution of 1/62 probability for |
153 |
> every index. It will waste 6 bits with the probability of 1/32 (2/64). |
154 |
> |
155 |
> > int |
156 |
> > main(int argc, char* argv[]) |
157 |
> > { |
158 |
> > // Use the correct call for initializing the ISAAC seed |
159 |
> > srand((unsigned int)time(NULL)); |
160 |
> > for (int i = 0; i < 20; i++) // --std=c99 |
161 |
> > printf("%c\n", next_character()); |
162 |
> > return 0; |
163 |
> > } |
164 |
> > ------------------------------------------------------------------------------- |
165 |
> > |
166 |
> > If the ISAAC RNG has a good distribution, then the next_character() |
167 |
> > function will give a good distribution among the set [A-Za-z0-9]. |
168 |
> > |
169 |
> > Unless I missunderstood what you meant with "create random numbers in |
170 |
> > the range of [a-Za-z0-9]". |
171 |
|
172 |
-- |
173 |
-Matti |