1 |
On Mon, Jul 27, 2020, at 9:24 PM, Adam Carter wrote: |
2 |
> > Compare realtime it to measured CPU time. If one realtime second is shorter than a |
3 |
> > CPU second then you know the host is pausing your VM. There are other ways to |
4 |
> > check, but this should always work if you can contact an asynchronous time standard. |
5 |
> > You may need to average the time over tens of seconds or a minute. |
6 |
> > |
7 |
> > This method will allow you to figure out that AWS spot instances are |
8 |
> > oversubscribed ~1.5x. |
9 |
> > |
10 |
> |
11 |
> Nice. FWIW the guest is running NTP. |
12 |
> |
13 |
> So should I run something like: date ; time <some command that runs at |
14 |
> 100%CPU for a minute> ; date ? |
15 |
|
16 |
No, date will pull from your RTC, which is usually kept up to date with an asynchronous |
17 |
counter. |
18 |
|
19 |
First check GNU top(1) and look in the %Cpu line for "st." That is % CPU time stolen. If it is |
20 |
nonzero then the guest time's accounting is probably working. It's not typical for the |
21 |
hypervisor to hide this information. It's really important for load balancing. |
22 |
|
23 |
If that doesn't work we're going to have to write some C. Look at clock_gettime(3): |
24 |
https://linux.die.net/man/3/clock_gettime. |
25 |
|
26 |
The clocks are performance counters. Usually their only guarantee is that they go up. |
27 |
On some platforms you may be able to see a difference between CLOCK_REALTIME and |
28 |
CLOCK_MONOTONIC. On most platforms however, CLOCK_MONOTONIC is clocked |
29 |
from the CPU timebase and continues to increment when your program is not running. |
30 |
On Windows the API exposes the per-core clocks as well. |
31 |
|
32 |
So to get around this, you need to know the frequency of the processor and how long |
33 |
it takes to execute specific instructions. |
34 |
|
35 |
% time ./stealcheck |
36 |
real 0.680168s |
37 |
expected 0.625681s |
38 |
./stealcheck 0.69s user 0.00s system 98% cpu 0.698 total |
39 |
|
40 |
As commented below, I didn't have time to find the exact cycle count for a busy loop. |
41 |
But six is familiar and these times line up with what `time` gives. The other issue is |
42 |
I haven't implemented CPU pinning nor have I fixed the frequency. |
43 |
|
44 |
If possible do those, otherwise you can still infer an accurate steal time it just |
45 |
requires statistics. This will be good enough for a yes/no answer. (I.e. if you |
46 |
get a noticeable discrepancy buy more hardware.) |
47 |
|
48 |
https://github.com/R030t1/stealcheck |
49 |
|
50 |
g++ -std=gnu++2a -Wall -pedantic \ |
51 |
stealcheck.cc -o stealcheck |
52 |
|
53 |
#include <stdint.h> |
54 |
#include <stdlib.h> |
55 |
#include <stdio.h> |
56 |
#include <time.h> |
57 |
|
58 |
#include <string> |
59 |
#include <regex> |
60 |
#include <iostream> |
61 |
#include <fstream> |
62 |
using namespace std; |
63 |
|
64 |
uint64_t cpufreq(); |
65 |
|
66 |
int main(int argc, char *argv[]) { |
67 |
// If you have a newer processor you can request |
68 |
// cpuid level 0x16. For this impl. libpcre is |
69 |
// likely faster. |
70 |
uint64_t cf = cpufreq(), |
71 |
// Six is familiar but likely not right. |
72 |
cycles_per_loop = 6; |
73 |
|
74 |
struct timespec start = { 0 }; |
75 |
clock_gettime(CLOCK_REALTIME, &start); |
76 |
|
77 |
// Confirm the cycle count of these instructions for |
78 |
// accurate results and/or implement loop with asm. |
79 |
uint64_t count = 0x10000000, orig = 0x10000000; |
80 |
while (count--); |
81 |
|
82 |
struct timespec end = { 0 }; |
83 |
clock_gettime(CLOCK_REALTIME, &end); |
84 |
// Calculate delta. |
85 |
end.tv_sec -= start.tv_sec; |
86 |
end.tv_nsec -= start.tv_nsec; |
87 |
|
88 |
double real = (end.tv_sec * 1.0) + (end.tv_nsec / 1000000000.0); |
89 |
double expected = (1.0 / cf) * orig * cycles_per_loop; |
90 |
printf("real\t\t%lfs\n", real); |
91 |
printf("expected\t%lfs\n", expected); |
92 |
|
93 |
return 0; |
94 |
} |
95 |
|
96 |
uint64_t cpufreq() { |
97 |
uint64_t res = 0; |
98 |
regex pattern("^cpu MHz.*?([\\d.]+)"); |
99 |
smatch glean; |
100 |
|
101 |
string line; |
102 |
ifstream cpuinf("/proc/cpuinfo"); |
103 |
while (getline(cpuinf, line)) { |
104 |
if (!regex_search(line, glean, pattern)) |
105 |
continue; |
106 |
// This effectively returns the last one, but I didn't |
107 |
// want to add CPU pinning etc. They are typically close |
108 |
// together. |
109 |
res = stod(glean[1].str()) * 1000000; |
110 |
} |
111 |
|
112 |
return res; |
113 |
} |