1 |
On Fri, Jun 6, 2014 at 1:39 PM, <meino.cramer@×××.de> wrote: |
2 |
> Canek Peláez Valdés <caneko@×××××.com> [14-06-06 17:36]: |
3 |
>> On Thu, Jun 5, 2014 at 9:56 PM, <meino.cramer@×××.de> wrote: |
4 |
>> > Hi, |
5 |
>> > |
6 |
>> > I am experimenting with the C code of the ISAAC pseudo random number generator |
7 |
>> > (http://burtleburtle.net/bob/rand/isaacafa.html). |
8 |
>> > |
9 |
>> > Currently the implementation creates (on my embedded linux) 32 bit |
10 |
>> > hexadecimal output. |
11 |
>> |
12 |
>> So it's a 32 bit integer. |
13 |
>> |
14 |
>> > From this I want to create random numbers in the range of [a-Za-z0-9] |
15 |
>> > *without violating randomness* and (if possible) without throwing |
16 |
>> > away bits of the output. |
17 |
>> |
18 |
>> You mean *characters* int the range [A-Za-z0-9]? |
19 |
>> |
20 |
>> > How can I do this mathemtically (in concern of the quality of output) |
21 |
>> > correct? |
22 |
>> |
23 |
>> The easiest thing to do would be: |
24 |
>> |
25 |
>> ------------------------------------------------------------------------------- |
26 |
>> #include <time.h> |
27 |
>> #include <stdio.h> |
28 |
>> #include <stdlib.h> |
29 |
>> |
30 |
>> #define N (26+26+10) |
31 |
>> |
32 |
>> static char S[] = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', |
33 |
>> 'K', 'L', 'M', |
34 |
>> 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', |
35 |
>> 'X', 'Y', 'Z', |
36 |
>> 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', |
37 |
>> 'k', 'l', 'm', |
38 |
>> 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', |
39 |
>> 'x', 'y', 'z', |
40 |
>> '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }; |
41 |
>> |
42 |
>> int |
43 |
>> next_character() |
44 |
>> { |
45 |
>> // Use the correct call for ISAAC instead of rand() |
46 |
>> unsigned int idx = rand() % N; |
47 |
>> return S[idx]; |
48 |
>> } |
49 |
>> |
50 |
>> int |
51 |
>> main(int argc, char* argv[]) |
52 |
>> { |
53 |
>> // Use the correct call for initializing the ISAAC seed |
54 |
>> srand((unsigned int)time(NULL)); |
55 |
>> for (int i = 0; i < 20; i++) // --std=c99 |
56 |
>> printf("%c\n", next_character()); |
57 |
>> return 0; |
58 |
>> } |
59 |
>> ------------------------------------------------------------------------------- |
60 |
>> |
61 |
>> If the ISAAC RNG has a good distribution, then the next_character() |
62 |
>> function will give a good distribution among the set [A-Za-z0-9]. |
63 |
>> |
64 |
>> Unless I missunderstood what you meant with "create random numbers in |
65 |
>> the range of [a-Za-z0-9]". |
66 |
>> |
67 |
>> Regards. |
68 |
>> -- |
69 |
>> Canek Peláez Valdés |
70 |
>> Profesor de asignatura, Facultad de Ciencias |
71 |
>> Universidad Nacional Autónoma de México |
72 |
>> |
73 |
> |
74 |
> Hi, |
75 |
> |
76 |
> Thank you very much for the input! :) |
77 |
> |
78 |
> I have a question about the algorithm: |
79 |
> Suppose rand() has an equal distribution of numbers and furthermore |
80 |
> one has a count of 2^32 random numbers listed in numerical sort |
81 |
> order. |
82 |
> In this list each number would appear (nearly) with the same count: 1 |
83 |
> |
84 |
> To get an better imagination of that...suppose the rand() would only |
85 |
> return numbers in the range of 1...12 and the alphabet has only 8 |
86 |
> characters (as 2^32 is not devideable by 62) |
87 |
> |
88 |
> rand(): |
89 |
> 1 2 3 4 5 6 7 8 9 10 11 12 |
90 |
> |
91 |
> rand()%N : rand()%7 |
92 |
> 1 2 3 4 5 6 7 0 1 2 3 4 |
93 |
> |
94 |
> or in other words: An even distribution of numbers of rand() |
95 |
> would result in a unevenly distributed sequence of characters...or? |
96 |
> This would break the quality of ISAACs output. |
97 |
> |
98 |
> I am sure I did something wrong here...but where is the logic trap? |
99 |
|
100 |
In theory, it doesn't matter if the number of random characters you |
101 |
want is not multiple of the size of the set of characters. In |
102 |
practice, it doesn't matter if the number of random characters you |
103 |
want is big enough. |
104 |
|
105 |
Consider the following modification to my program (I changed the order |
106 |
of the array S so it's sorted and I can do binary search): |
107 |
|
108 |
---------------------------------------------------------------------- |
109 |
#include <time.h> |
110 |
#include <stdio.h> |
111 |
#include <stdlib.h> |
112 |
|
113 |
#define N (10+26+26) |
114 |
|
115 |
static char S[] = { |
116 |
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', |
117 |
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', |
118 |
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', |
119 |
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', |
120 |
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' |
121 |
}; |
122 |
|
123 |
static int |
124 |
index_of_aux(char c, int a, int b) |
125 |
{ |
126 |
if (a > b) |
127 |
return -1; |
128 |
int m = (a + b) / 2; |
129 |
if (S[m] == c) |
130 |
return m; |
131 |
if (S[m] > c) |
132 |
return index_of_aux(c, a, m-1); |
133 |
return index_of_aux(c, m+1, b); |
134 |
} |
135 |
|
136 |
static int |
137 |
index_of(char c) |
138 |
{ |
139 |
return index_of_aux(c, 0, N-1); |
140 |
} |
141 |
|
142 |
int |
143 |
next_character() |
144 |
{ |
145 |
// Use the correct call for isaac instead of rand() |
146 |
unsigned int idx = rand() % N; |
147 |
return S[idx]; |
148 |
} |
149 |
|
150 |
int |
151 |
main(int argc, char* argv[]) |
152 |
{ |
153 |
srand((unsigned int)time(NULL)); |
154 |
int total = 1000000; |
155 |
|
156 |
// Size = 62 |
157 |
int count[] = { |
158 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
159 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
160 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
161 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
162 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
163 |
}; |
164 |
|
165 |
for (int i = 0; i < total; i++) { |
166 |
char c = next_character(); |
167 |
count[index_of(c)]++; |
168 |
} |
169 |
|
170 |
int min = total, max = 0; |
171 |
for (int i = 0; i < N; i++) { |
172 |
printf("Count %d: %d\n", i, count[i]); |
173 |
min = (count[i] < min) ? count[i] : min; |
174 |
max = (count[i] > max) ? count[i] : max; |
175 |
} |
176 |
printf("Min: %d\n", min); |
177 |
printf("Max: %d\n", max); |
178 |
printf("Diff: %d\n", (max-min)); |
179 |
return 0; |
180 |
} |
181 |
---------------------------------------------------------------------- |
182 |
|
183 |
The output is the following: |
184 |
|
185 |
Count 0: 16060 |
186 |
Count 1: 16102 |
187 |
Count 2: 15968 |
188 |
Count 3: 16075 |
189 |
... |
190 |
Count 59: 16220 |
191 |
Count 60: 16143 |
192 |
Count 61: 16177 |
193 |
Min: 15819 |
194 |
Max: 16366 |
195 |
Diff: 547 |
196 |
|
197 |
As you can see, all the characters appear around 16,000 times, so |
198 |
it's pretty well distributed. If the RNG has a good distribution, it |
199 |
will *always* look something like this. The difference will *never* be |
200 |
0, since no RNG is perfectly distributed. |
201 |
|
202 |
So, TL;DR: it doesn't matter if total is not multiple of N. |
203 |
|
204 |
Regards. |
205 |
-- |
206 |
Canek Peláez Valdés |
207 |
Profesor de asignatura, Facultad de Ciencias |
208 |
Universidad Nacional Autónoma de México |