Gentoo Archives: gentoo-commits

From: Kent Fredric <kentfredric@×××××.com>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/perl-overlay:master commit in: scripts/
Date: Wed, 18 Apr 2012 03:32:51
Message-Id: 1334719551.b2800087a6719b8b9df1732d7ecdac3f5fab8b06.kent@gentoo
1 commit: b2800087a6719b8b9df1732d7ecdac3f5fab8b06
2 Author: Kent Fredric <kentfredric <AT> gmail <DOT> com>
3 AuthorDate: Wed Apr 18 03:25:51 2012 +0000
4 Commit: Kent Fredric <kentfredric <AT> gmail <DOT> com>
5 CommitDate: Wed Apr 18 03:25:51 2012 +0000
6 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/perl-overlay.git;a=commit;h=b2800087
7
8 [scripts/package_map_all.pl] hacks to get around the abysmal speed I experienced today with the API, request batching and ssl stuff
9
10 ---
11 scripts/package_map_all.pl | 120 +++++++++++++++++++++++++------------------
12 1 files changed, 70 insertions(+), 50 deletions(-)
13
14 diff --git a/scripts/package_map_all.pl b/scripts/package_map_all.pl
15 index 351cd63..8bb260e 100755
16 --- a/scripts/package_map_all.pl
17 +++ b/scripts/package_map_all.pl
18 @@ -28,7 +28,8 @@ if ( $optparse->has_long_opt('root') ) {
19 $root = Path::Class::Dir->new( $optparse->long_opt('root') );
20 }
21
22 -my $size = 1000;
23 +my $size = 500;
24 +my $scroll_time = '20m';
25
26 my $metadata = $root->subdir( 'metadata', 'perl' );
27 my $distmap = $metadata->subdir('distmap');
28 @@ -52,7 +53,7 @@ my %g_repos;
29 for ( keys %{$nodes} ) {
30 my $records = $nodes->{$_};
31 $lookup{$_}++;
32 - for my $rec ( @{ $records }) {
33 + for my $rec ( @{$records} ) {
34 my $repo = $rec->{repository};
35 $repos{$repo}++;
36 }
37 @@ -72,58 +73,17 @@ my %g_repos;
38
39 my @dists = keys %lookup;
40
41 -my $search = {};
42 -$search->{query} = { constant_score => { filter => { terms => { distribution => [@dists] } } } };
43 -$search->{sort} = [ { 'date' => 'desc', }, ];
44 -$search->{size} = $size;
45 -$search->{fields} = [
46 - qw(
47 - abstract
48 - archive
49 - author
50 - authorized
51 - date
52 - distribution
53 - download_url
54 - license
55 - maturity
56 - name
57 - status
58 - version
59 - )
60 -];
61 -
62 -$ENV{WWW_MECH_NOCACHE} = 1;
63 -
64 -my $results_string = mcpan->ua->request(
65 - 'POST',
66 - mcpan->base_url . 'release/_search?search_type=scan&scroll=30s&size=' . $size,
67 - {
68 - headers => { 'Accept-Encoding' => 'gzip', },
69 - content => $encoder->encode($search),
70 - }
71 -);
72 -
73 -say $results_string->{content};
74 -
75 -my $results = $decoder->decode( $results_string->{content} );
76 -my $scroll_id = $results->{_scroll_id};
77 +my $dtree;
78
79 -my $total_results = $results->{hits}->{total};
80 +my $seen = 0;
81
82 -say "Found: $total_results releases";
83 +use List::MoreUtils qw( natatime );
84
85 -my $dtree;
86 -my $seen = 0;
87 +my $it = natatime 500, @dists;
88
89 -while (1) {
90 - my ( $result, $scroll ) = scroll($scroll_id);
91 - last unless scalar @{ $result->{hits}->{hits} };
92 - collate_resultset($result);
93 - $scroll_id = $scroll;
94 - say "Seen $seen of $total_results";
95 +while ( my @dists_batch = $it->() ) {
96 + get_data_for(@dists_batch);
97 }
98 -
99 for my $package ( sort keys %{$dtree} ) {
100 say "Sorting $package";
101 $dtree->{$package} = [ sort { $b->{date} cmp $a->{date} } @{ $dtree->{$package} } ];
102 @@ -134,14 +94,74 @@ $fh->print( $encoder->encode($dtree) );
103
104 exit 0;
105
106 +sub get_data_for {
107 + my (@items) = @_;
108 + my $search = {};
109 + $search->{query} = { constant_score => { filter => { terms => { distribution => [@items] } } } };
110 + $search->{sort} = [ { 'date' => 'desc', }, ];
111 + $search->{size} = $size;
112 + $search->{fields} = [
113 + qw(
114 + abstract
115 + archive
116 + author
117 + authorized
118 + date
119 + distribution
120 + download_url
121 + license
122 + maturity
123 + name
124 + status
125 + version
126 + )
127 + ];
128 +
129 + $ENV{WWW_MECH_NOCACHE} = 1;
130 +
131 + my $results_string = mcpan->ua->request(
132 + 'POST',
133 + 'https://api.metacpan.org/release/_search?search_type=scan&scroll=' . $scroll_time . '&size=' . $size,
134 + {
135 + headers => { 'Accept-Encoding' => 'gzip', },
136 + content => $encoder->encode($search),
137 + }
138 + );
139 +
140 + my $results = $decoder->decode( $results_string->{content} );
141 + my $scroll_id = $results->{_scroll_id};
142 +
143 + my $total_results = $results->{hits}->{total};
144 +
145 + say "Found: $total_results releases";
146 + $seen = 0;
147 + while (1) {
148 + my ( $result, $scroll ) = scroll($scroll_id);
149 + last unless scalar @{ $result->{hits}->{hits} };
150 + collate_resultset($result);
151 + $scroll_id = $scroll;
152 + say "Seen $seen of $total_results";
153 + }
154 +
155 +}
156 +
157 sub scroll {
158 my ($id) = @_;
159 my $result = mcpan->ua->request(
160 'GET',
161 - 'http://api.metacpan.org/_search/scroll/?scroll=30s&size=' . $size . '&scroll_id=' . $id,
162 + 'https://api.metacpan.org/_search/scroll/?scroll=' . $scroll_time . '&size=' . $size . '&scroll_id=' . $id,
163 { headers => { 'Accept-Encoding' => 'gzip', } }
164 );
165
166 + if ( $result->{content} =~ /Server Error/ ) {
167 + require Data::Dump;
168 + Data::Dump::pp( { result => $result, size => $size, scroll_id => $id } );
169 + die;
170 + }
171 + else {
172 + #require Data::Dump;
173 + #Data::Dump::pp( { result => { %{$result}, content => '...' }, size => $size, scroll_id => $id } );
174 + }
175 my $data = $decoder->decode( $result->{content} );
176 return $data, $data->{_scroll_id};
177 }