From 4b23ef459808486c6c82e6b042f4e09c5f4fd0d1 Mon Sep 17 00:00:00 2001 From: Pavel Date: Wed, 19 Jun 2024 17:29:20 +0300 Subject: [PATCH] reworked --- repository/statistics/click_statistics.go | 72 ++++++++++++++--------- utils/binary_search.go | 20 +++++++ 2 files changed, 64 insertions(+), 28 deletions(-) create mode 100644 utils/binary_search.go diff --git a/repository/statistics/click_statistics.go b/repository/statistics/click_statistics.go index a786ea4..76c980c 100644 --- a/repository/statistics/click_statistics.go +++ b/repository/statistics/click_statistics.go @@ -4,6 +4,8 @@ import ( "context" "database/sql" "fmt" + "penahub.gitlab.yandexcloud.net/backend/quiz/common.git/utils" + "sort" "strings" ) @@ -59,7 +61,6 @@ type Statistic struct { type PipeLineStatsResp [][]Statistic -// todo нужно исключить множества из подмножеств, пока что получаются все уникальные векторы респондентов по опросу // пример: //"[0, 116783, 116810]" //"[0, 116783, 116798]" @@ -69,27 +70,15 @@ type PipeLineStatsResp [][]Statistic //"[0, 116783]" //"[0, 116783, 116810, 116843]" -//SELECT DISTINCT final FROM ( -//SELECT groupArray(ctxquestionid) AS final FROM (SELECT DISTINCT f.ctxsession, a.ctxquestionid -//FROM (SELECT ctxsession, max(event_time) AS max_time -//FROM statistics WHERE ctxquizid = 26276 GROUP BY ctxsession ) f -//JOIN ( SELECT ctxsession, ctxquestionid, event_time -//FROM statistics WHERE ctxquizid = 26276 ) a ON f.ctxsession = a.ctxsession) -//GROUP BY ctxsession); +//SELECT DISTINCT last_que, reversed +//FROM ( SELECT groupArray(ctxquestionid) AS reversed, arraySlice(arrayReverse(groupArray(ctxquestionid)), 1, 1)[1] AS last_que +//FROM statistics WHERE ctxquizid = 26276 GROUP BY ctxsession ) AS sub; -func (s *StatisticClick) getFunnel(ctx context.Context, quizID int64, from uint64, to uint64) (map[int64][]string, error) { - // берем из матвью все что принадлежит quizID в указанном интервале времени - // выбираем самыые поздние по роу набер - 1 - // группируем по ид вопроса и для каждого ид вопроса формируем массив сессий которые были последнимим для этого вопроса - // выбираем только те где длина массива = 1 +func (s *StatisticClick) getFunnel(ctx context.Context, quizID int64, from uint64, to uint64) (map[int64][]int64, error) { query := ` - SELECT ctxquestionid, arrayJoin(endsession) AS session - FROM (SELECT ctxquestionid, groupArray(ctxsession) AS endsession - FROM (SELECT ctxsession,ctxquestionid,row_number() OVER (PARTITION BY ctxsession ORDER BY event_time DESC) AS row_num - FROM mv_last_answers_events WHERE ctxquizid = ? AND event_time BETWEEN ? AND ? - ) AS rows - WHERE row_num = 1 GROUP BY ctxquestionid - ) AS group_sessions WHERE length(endsession) = 1; + SELECT DISTINCT last_que, reversed + FROM ( SELECT groupArray(ctxquestionid) AS reversed, arraySlice(arrayReverse(groupArray(ctxquestionid)), 1, 1)[1] AS last_que + FROM statistics WHERE ctxquizid = ? AND event_time BETWEEN ? AND ? GROUP BY ctxsession ) AS sub; ` rows, err := s.conn.QueryContext(ctx, query, quizID, from, to) @@ -98,23 +87,50 @@ func (s *StatisticClick) getFunnel(ctx context.Context, quizID int64, from uint6 } defer rows.Close() - funnel := make(map[int64][]string) - + funnel := make(map[int64][]int64) for rows.Next() { - var questionID int64 - var sessionID string - err := rows.Scan(&questionID, &sessionID) - if err != nil { + var lastQue int64 + var reversed []int64 + if err := rows.Scan(&lastQue, &reversed); err != nil { return nil, err } - funnel[questionID] = append(funnel[questionID], sessionID) + funnel[lastQue] = reversed } if err := rows.Err(); err != nil { return nil, err } - return funnel, nil + result := make(map[int64][]int64) + keys := make([]int64, 0, len(funnel)) + for key := range funnel { + keys = append(keys, key) + } + sort.Slice(keys, func(i, j int) bool { + return keys[i] < keys[j] + }) + + for _, lastQue := range keys { + reversed := funnel[lastQue] + found := false + + for _, otherLastQue := range keys { + if otherLastQue != lastQue { + otherReversed := funnel[otherLastQue] + index := utils.BinarySearch(lastQue, otherReversed) + if index { + found = true + break + } + } + } + + if !found { + result[lastQue] = reversed + } + } + + return result, nil } func (s *StatisticClick) GetPipelinesStatistics(ctx context.Context, quizID int64, from uint64, to uint64) (PipeLineStatsResp, error) { diff --git a/utils/binary_search.go b/utils/binary_search.go new file mode 100644 index 0000000..8e91039 --- /dev/null +++ b/utils/binary_search.go @@ -0,0 +1,20 @@ +package utils + +func BinarySearch(target int64, array []int64) bool { + left := 0 + right := len(array) - 1 + + for left <= right { + mid := (left + right) / 2 + + if array[mid] == target { + return true + } else if array[mid] < target { + left = mid + 1 + } else if array[mid] > target { + right = mid - 1 + } + } + + return false +}