added reworkerd clickhouse stats query

This commit is contained in:
Pavel 2024-06-30 11:42:21 +03:00
parent 89234a64c7
commit aae2b1be77

@ -4,9 +4,6 @@ import (
"context"
"database/sql"
"fmt"
"penahub.gitlab.yandexcloud.net/backend/quiz/common.git/utils"
"sort"
"strings"
)
type DepsClick struct {
@ -55,24 +52,15 @@ type Statistic struct {
type PipeLineStatsResp map[int64][]Statistic
// пример:
//"[0, 116783, 116810]"
//"[0, 116783, 116798]"
//"[0, 116783, 116798, 116831]"
//"[0, 116783, 116810, 116849]"
//[0]
//"[0, 116783]"
//"[0, 116783, 116810, 116843]"
func (s *StatisticClick) GetPipelinesStatistics(ctx context.Context, quizID int64, from uint64, to uint64) (PipeLineStatsResp, error) {
pipelines := make(PipeLineStatsResp)
//SELECT DISTINCT last_que, reversed
//FROM ( SELECT groupArray(ctxquestionid) AS reversed, arraySlice(arrayReverse(groupArray(ctxquestionid)), 1, 1)[1] AS last_que
//FROM statistics WHERE ctxquizid = 26276 GROUP BY ctxsession ) AS sub;
func (s *StatisticClick) getFunnel(ctx context.Context, quizID int64, from uint64, to uint64) (map[int64][]int64, error) {
query := `
SELECT DISTINCT last_que, reversed
FROM ( SELECT groupUniqArray(ctxquestionid) AS reversed, arraySlice(arrayReverse(groupArray(ctxquestionid)), 1, 1)[1] AS last_que
FROM mv_last_answers_events WHERE ctxquizid = ? AND event_time BETWEEN ? AND ? GROUP BY ctxsession ) AS sub;
select last_quesion, questions, count() from (select last_quesion, questions, target_quiz
from (select last_quesion, questions, target_quiz from view_pipelines_signs
join view_respondent_paths on long_sess = session array join questions)
as pipelines join mv_answers on questions=questionid and quizid=target_quiz
where target_quiz = ? AND event_time BETWEEN ? AND ? ) group by last_quesion, questions;
`
rows, err := s.conn.QueryContext(ctx, query, quizID, from, to)
@ -81,119 +69,26 @@ func (s *StatisticClick) getFunnel(ctx context.Context, quizID int64, from uint6
}
defer rows.Close()
funnel := make(map[int64][]int64)
for rows.Next() {
var lastQue int64
var reversed []int64
if err := rows.Scan(&lastQue, &reversed); err != nil {
return nil, err
}
funnel[lastQue] = reversed
}
if err := rows.Err(); err != nil {
return nil, err
}
result := make(map[int64][]int64)
keys := make([]int64, 0, len(funnel))
for key := range funnel {
keys = append(keys, key)
}
sort.Slice(keys, func(i, j int) bool {
return keys[i] < keys[j]
})
for _, lastQue := range keys {
reversed := funnel[lastQue]
found := false
for _, otherLastQue := range keys {
if otherLastQue != lastQue {
otherReversed := funnel[otherLastQue]
sort.Slice(otherReversed, func(i, j int) bool {
return otherReversed[i] < otherReversed[j]
})
index := utils.BinarySearch(lastQue, otherReversed)
if index {
found = true
break
}
}
}
if !found {
result[lastQue] = reversed
}
}
return result, nil
}
func (s *StatisticClick) GetPipelinesStatistics(ctx context.Context, quizID int64, from uint64, to uint64) (PipeLineStatsResp, error) {
pipelines := make(PipeLineStatsResp)
funnel, err := s.getFunnel(ctx, quizID, from, to)
if err != nil {
return nil, err
}
for lastQue, idS := range funnel {
sesCount, err := s.countSession(ctx, quizID, from, to, idS)
if err != nil {
return nil, err
}
for _, queID := range idS {
if sessionCount, ok := sesCount[queID]; ok {
pipeline := Statistic{
QuestionID: queID,
Count: sessionCount,
}
pipelines[lastQue] = append(pipelines[lastQue], pipeline)
}
}
}
return pipelines, nil
}
func (s *StatisticClick) countSession(ctx context.Context, quizID int64, from uint64, to uint64, questionIDs []int64) (map[int64]int64, error) {
placeholders := make([]string, len(questionIDs))
args := make([]interface{}, len(questionIDs)+3)
args[0] = quizID
for i, id := range questionIDs {
placeholders[i] = "?"
args[i+1] = id
}
args[len(args)-2] = from
args[len(args)-1] = to
query := fmt.Sprintf(`
SELECT ctxquestionid, COUNT(DISTINCT ctxsession) AS session_count
FROM statistics
WHERE ctxquizid = ? AND ctxquestionid IN (%s) AND event_time BETWEEN ? AND ?
GROUP BY ctxquestionid;
`, strings.Join(placeholders, ","))
rows, err := s.conn.QueryContext(ctx, query, args...)
if err != nil {
return nil, err
}
defer rows.Close()
counts := make(map[int64]int64)
for rows.Next() {
var lastQuestionID int64
var questionID int64
var count int64
err := rows.Scan(&questionID, &count)
if err != nil {
if err := rows.Scan(&lastQuestionID, &questionID, &count); err != nil {
return nil, err
}
counts[questionID] = count
stat := Statistic{
Count: count,
QuestionID: questionID,
}
pipelines[lastQuestionID] = append(pipelines[lastQuestionID], stat)
}
if err := rows.Err(); err != nil {
return nil, err
}
return counts, nil
return pipelines, nil
}