Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/sugar_network/node/stats.py
blob: d1b0d63775f4652d32d207a8262fa9fceb3ea350 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
# Copyright (C) 2014 Aleksey Lim
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import os
import time
import logging

from sugar_network.toolkit.rrd import Rrd
from sugar_network.toolkit.router import route, postroute, Request
from sugar_network.toolkit.coroutine import this
from sugar_network.toolkit import Option, coroutine, enforce


stats = Option(
        'collect unpersonalized node statistics',
        default=False, type_cast=Option.bool_cast, action='store_true')

stats_step = Option(
        'step interval in seconds for RRD statistics database',
        default=60 * 5, type_cast=int)

stats_rras = Option(
        'comma separated list of RRAs for RRD statistics database',
        default=[
            'RRA:AVERAGE:0.5:1:864',        # 3d with 5min step
            'RRA:AVERAGE:0.5:288:3660',     # 10y with 1d step
            'RRA:AVERAGE:0.5:2880:366',     # 10y with 10d step
            'RRA:AVERAGE:0.5:8640:122',     # 10y with 30d step
            'RRA:AVERAGE:0.5:105120:10',    # 10y with 1y step
            ],
        type_cast=Option.list_cast, type_repr=Option.list_repr)

_HEARTBEAT_EVER = 60 * 60 * 24 * 365

_DS = {
    'contexts': {
        'type': 'GAUGE',
        'heartbeat': _HEARTBEAT_EVER,
        'resource': 'context',
        'query': {},
        },
    'released': {
        'type': 'ABSOLUTE',
        },
    'solved': {
        'type': 'ABSOLUTE',
        },
    'reported': {
        'type': 'ABSOLUTE',
        },
    'topics': {
        'type': 'GAUGE',
        'heartbeat': _HEARTBEAT_EVER,
        'resource': 'user',
        'query': {'topic': ''},
        },
    'posts': {
        'type': 'GAUGE',
        'heartbeat': _HEARTBEAT_EVER,
        'resource': 'user',
        'query': {'not_topic': ''},
        },
    'users': {
        'type': 'GAUGE',
        'heartbeat': _HEARTBEAT_EVER,
        'resource': 'user',
        'query': {},
        },
    }

_ROUTES = {
    ('POST', 'context', None, None):
        ('contexts', +1),
    ('DELETE', 'context', None, None):
        ('contexts', -1),
    ('POST', 'context', 'releases', None):
        ('released', +1),
    ('GET', 'context', None, 'solve'):
        ('solved', +1),
    ('POST', 'report', None, None):
        ('reported', +1),
    ('POST', 'post', None, None):
        (lambda: 'posts' if this.resource['topic'] else 'topics', +1),
    ('DELETE', 'post', None, None):
        (lambda: 'posts' if this.resource['topic'] else 'topics', -1),
    ('POST', 'user', None, None):
        ('users', +1),
    ('DELETE', 'user', None, None):
        ('users', -1),
    }

_MAX_STAT_RECORDS = 100

_logger = logging.getLogger('node.stats')


class StatRoutes(object):

    _rrd = None
    _stats = None
    _rating = None
    _stated = False

    def stats_init(self, path, step, rras):
        _logger.info('Collect node stats in %r', path)

        self._rrd = Rrd(path, 'stats', _DS, step, rras)
        self._stats = self._rrd.values()
        self._rating = {'context': {}, 'post': {}}

        if not self._stats:
            for field, traits in _DS.items():
                value = 0
                if traits['type'] == 'GAUGE':
                    directory = this.volume[traits['resource']]
                    __, value = directory.find(limit=0, **traits['query'])
                self._stats[field] = value

    @postroute
    def stat_on_postroute(self, result, exception, stat_rating=True):
        if self._rrd is None or exception is not None:
            return result

        r = this.request
        route_ = _ROUTES.get((r.method, r.resource, r.prop, r.cmd))
        if route_ is None:
            return result
        stat, shift = route_
        self._stated = True

        if not isinstance(stat, basestring):
            stat = stat()
        self._stats[stat] += shift

        if stat_rating and r.method == 'POST' and r.resource == 'post':
            rating = None
            if stat == 'topics' and this.resource['type'] == 'review':
                rating = self._rating['context']
                rating = rating.setdefault(this.resource['context'], [0, 0])
            else:
                rating = self._rating['post']
                rating = rating.setdefault(this.resource['topic'], [0, 0])
            if rating is not None:
                rating[0] += shift
                rating[1] += shift * this.resource['vote']

        return result

    @route('GET', cmd='stats', arguments={
                'start': int, 'end': int, 'records': int, 'source': list},
            mime_type='application/json')
    def stats(self, start, end, records, source):
        enforce(self._rrd is not None, 'Statistics disabled')

        if not start:
            start = self._rrd.first or 0
        if not end:
            end = self._rrd.last or 0
        if records > _MAX_STAT_RECORDS:
            _logger.debug('Decrease %d stats records number to %d',
                    records, _MAX_STAT_RECORDS)
            records = _MAX_STAT_RECORDS
        elif records <= 0:
            records = _MAX_STAT_RECORDS / 10
        resolution = max(1, (end - start) / records)

        result = []
        for ts, values in self._rrd.get(start, end, resolution):
            if source:
                values = dict([(i, values[i]) for i in source])
            result.append((ts, values))
        return result

    def stats_auto_commit(self):
        while True:
            coroutine.sleep(self._rrd.step)
            self.stats_commit()

    def stats_commit(self, timestamp=None):
        if not self._stated:
            return
        self._stated = False

        _logger.trace('Commit stats')

        self._rrd.put(self._stats, timestamp)
        for field, traits in _DS.items():
            if traits['type'] == 'ABSOLUTE':
                self._stats[field] = 0

        for resource, stats_ in self._rating.items():
            directory = this.volume[resource]
            for guid, (votes, reviews) in stats_.items():
                rating = directory[guid]['rating']
                directory.update(guid, {
                    'rating': [rating[0] + votes, rating[1] + reviews],
                    })
            stats_.clear()

    def stats_regen(self, path, step, rras):
        for i in Rrd(path, 'stats', _DS, step, rras).files:
            os.unlink(i)
        self.stats_init(path, step, rras)
        for field in self._stats:
            self._stats[field] = 0

        def timeline(ts):
            ts = long(ts)
            end = long(time.time())
            step_ = None

            archives = {}
            for rra in rras:
                a_step, a_size = [long(i) for i in rra.split(':')[-2:]]
                a_step *= step
                a_start = end - min(end, a_step * a_size)
                if archives.setdefault(a_start, a_step) > a_step:
                    archives[a_start] = a_step
            archives = list(sorted(archives.items()))

            while ts <= end:
                while not step_ or archives and ts >= archives[0][0]:
                    archive_start, step_ = archives.pop(0)
                    ts = max(ts / step_ * step_, archive_start)
                yield ts, ts + step_ - 1, step_
                ts += step_

        items, __ = this.volume['context'].find(limit=1, order_by='ctime')
        start = next(items)['ctime']
        for left, right, __ in timeline(start):
            for resource in ('user', 'context', 'post', 'report'):
                items, __ = this.volume[resource].find(
                        query='ctime:%s..%s' % (left, right))
                for this.resource in items:
                    this.request = Request(method='POST', path=[resource])
                    self.stat_on_postroute(None, None, False)
            self.stats_commit(left + (right - left) / 2)

    def stats_regen_rating(self, path, step, rras):

        def calc_rating(**kwargs):
            rating = [0, 0]
            alldocs, __ = this.volume['post'].find(**kwargs)
            for post in alldocs:
                rating[0] += 1
                rating[1] += post['vote']
            return rating

        alldocs, __ = this.volume['context'].find()
        for context in alldocs:
            rating = calc_rating(type='review', context=context.guid)
            this.volume['context'].update(context.guid, {'rating': rating})

        alldocs, __ = this.volume['post'].find(topic='')
        for topic in alldocs:
            rating = calc_rating(topic=topic.guid)
            this.volume['post'].update(topic.guid, {'rating': rating})