Post

SQL Dialogflow

SQL Query for dialogflow monitoring wuith Google Cloud

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
WITH sources AS (
  SELECT * FROM  `PROJECT_ID.northamerica-northeast1.dialogflow._AllLogs`
),

logs_bucket AS (
  SELECT
    severity,
    timestamp,
    EXTRACT(MINUTE FROM timestamp) AS minute,
    EXTRACT(HOUR FROM timestamp) AS hour,
    EXTRACT(DAY FROM timestamp) AS day,
    EXTRACT(MONTH FROM timestamp) AS month,
    EXTRACT(YEAR FROM timestamp) AS year,
    insert_id,
    log_id,
    log_name,
    JSON_VALUE(resource.labels.project_id) AS project_id,
    labels,
    text_payload,
    proto_payload,
    json_payload,
  FROM sources
),

es_logs AS (
  SELECT
    *,
    JSON_VALUE(labels.request_id) as intent_response_id,
    REGEXP_EXTRACT(text_payload, r'session_id: "([^"]+)"') AS session_id,
    REGEXP_EXTRACT(text_payload, r'lang: "([^"]+)"') AS language_code,
    REGEXP_EXTRACT(text_payload, r'intent_name: "([^"]+)"') AS intent_name,
	  REGEXP_EXTRACT(text_payload, r'resolved_query: "([^"]+)"') AS userText,
	  REGEXP_EXTRACT(text_payload, r'speech: "([^"]+)"') AS responseText,
    ROW_NUMBER() OVER (
      PARTITION BY 
        REGEXP_EXTRACT(text_payload, r'session_id: "([^"]+)"'), 
        REGEXP_EXTRACT(text_payload, r'intent_name: "([^"]+)"'), 
        TIMESTAMP_TRUNC(timestamp, MINUTE) 
        ORDER BY timestamp
    ) AS row_num
  FROM logs_bucket
  WHERE
    NORMALIZE_AND_CASEFOLD(severity , NFKC) = "info"
    AND NORMALIZE_AND_CASEFOLD(log_name , NFKC) = "projects/mega-agent-xllj/logs/dialogflow_agent"
    AND NORMALIZE_AND_CASEFOLD(SAFE.STRING(labels ["type"]), NFKC) = "dialogflow_response"
    AND NORMALIZE_AND_CASEFOLD(REGEXP_EXTRACT(text_payload, r'intent_name: "([^"]+)"'), NFKC) != "default welcome intent"
    AND DATE(timestamp) NOT IN (DATE '2024-07-04', DATE '2024-07-15', DATE '2024-07-16') --Date to exclude
),

es_logs_filtered AS (
    SELECT *
    FROM es_logs
    WHERE row_num = 1
),

es_request_logs AS (
  SELECT
    *,
    JSON_VALUE(json_payload.responseId) as intent_response_id,
    JSON_VALUE(json_payload.originalDetectIntentRequest.payload.applicationId) as application_id,
    JSON_VALUE(json_payload.originalDetectIntentRequest.payload.botId) as bot_id,
    TRIM(JSON_VALUE(json_payload.originalDetectIntentRequest.payload.firstName)) as first_name,
    JSON_VALUE(json_payload.originalDetectIntentRequest.payload.from) as from_user,
    JSON_VALUE(json_payload.originalDetectIntentRequest.payload.groupId) as session_id,
  FROM logs_bucket
  WHERE
    log_name="projects/mega-agent-xllj/logs/run.googleapis.com%2Fstdout"
    AND json_payload.originalDetectIntentRequest.payload IS NOT NULL
),

es_resquest_response_logs_join as (
  SELECT 
    b.timestamp as timestamp,
    b.severity,
    b.log_name,
    b.session_id,
    b.intent_response_id,
    a.insert_id as insert_id_a,
    b.insert_id as insert_id_b,
    a.application_id,
    a.bot_id,
    a.first_name,
    a.from_user,
    b.intent_name as response_intent,
    b.userText,
    b.responseText,
    b.language_code,
    b.text_payload,
    ROW_NUMBER() OVER (PARTITION BY b.insert_id ORDER BY b.timestamp DESC) AS row_num
  FROM es_request_logs a
  LEFT JOIN es_logs_filtered b
    ON a.intent_response_id = b.intent_response_id
    AND a.timestamp <= b.timestamp  -- Prevent duplicate pairs
    AND a.session_id = b.session_id
),

cx_logs AS (
  SELECT
    *,
    JSON_VALUE(labels.agent_id) as agent_id,
    JSON_VALUE(labels.session_id) as session_id,
    JSON_VALUE(json_payload.responseId) as intent_response_id,
    JSON_VALUE(json_payload.queryResult.match.matchType) as matchType,
    JSON_VALUE(json_payload.queryResult.match.intent.displayName) as intent_name,
    JSON_VALUE(json_payload.queryResult.match.event) as event_name,
    JSON_VALUE(json_payload.queryResult.generativeInfo.actionTracingInfo.actions[0].userUtterance.text) as userText,
    JSON_VALUE(json_payload.queryResult.generativeInfo.actionTracingInfo.actions[1].agentUtterance.text) as responseText,
    JSON_VALUE(json_payload.queryResult.languageCode) as language_code,
  FROM logs_bucket
  WHERE
    log_name="projects/mega-agent-xllj/logs/dialogflow-runtime.googleapis.com%2Frequests"
    AND json_payload.queryResult.match IS NOT NULL
),

cx_logs_filtered AS (
  SELECT *
  FROM cx_logs
  WHERE
    agent_id = "DIALOGFLOW_CX_AGENT_ID"
    AND NOT (
      (intent_name IS NOT DISTINCT FROM "Default Welcome Intent")
      OR (event_name IS NOT DISTINCT FROM "welcome_sti")
    )
),

cx_resquest_logs as (
  SELECT 
    *,
    JSON_VALUE(labels.agent_id) as agent_id,
    JSON_VALUE(labels.session_id) as session_id,
    JSON_VALUE(json_payload.fulfillmentInfo.tag) as tag,
    JSON_VALUE(json_payload.detectIntentResponseId) as intent_response_id,
    JSON_VALUE(json_payload.payload.applicationId) as application_id,
    JSON_VALUE(json_payload.payload.botId) as bot_id,
    JSON_VALUE(json_payload.payload.firstName) as first_name,
    JSON_VALUE(json_payload.payload.from) as from_user,
    JSON_VALUE(json_payload.payload.groupId) as group_id,
    JSON_VALUE(json_payload.triggerEvent) as triggerEvent,
    JSON_VALUE(json_payload.text) as text
  FROM logs_bucket
  WHERE 
    log_name = "projects/mega-agent-xllj/logs/dialogflow-runtime.googleapis.com%2Frequests"
    AND JSON_VALUE(labels.agent_id) = "DIALOGFLOW_CX_AGENT_ID"
    AND json_payload.payload IS NOT NULL
),

cx_resquest_response_logs_join as (
  SELECT 
    a.timestamp as timestamp,
    a.severity,
    a.log_name,
    a.session_id,
    a.intent_response_id,
    a.insert_id as insert_id_a,
    b.insert_id as insert_id_b,
    a.application_id,
    a.bot_id,
    a.first_name,
    a.from_user,
    a.tag as request_tag,
    a.triggerEvent as request_event,
    a.text as request_text,
    b.matchType as response_match,
    b.intent_name as response_intent,
    b.event_name as response_event,
    b.userText,
    b.responseText,
    b.language_code,
    b.json_payload,
    ROW_NUMBER() OVER (PARTITION BY a.insert_id ORDER BY a.timestamp DESC) AS row_num
  FROM cx_resquest_logs a
  LEFT JOIN cx_logs_filtered b
    ON a.intent_response_id = b.intent_response_id
    AND a.timestamp <= b.timestamp  -- Prevent duplicate pairs
    AND a.session_id = b.session_id
),

combined_response_logs AS (
  SELECT 
    'ES' AS source_data,
    severity,
    timestamp,
    minute,
    hour,
    day,
    month,
    year,
    insert_id,
    log_id,
    log_name,
    session_id,
    language_code,
    intent_name,
    userText,
    responseText,
    text_payload,
    NULL as json_payload,
  FROM es_logs_filtered

  UNION ALL

  SELECT 
    'CX' AS source_data,
    severity,
    timestamp,
    minute,
    hour,
    day,
    month,
    year,
    insert_id,
    log_id,
    log_name,
    session_id,
    language_code,
    intent_name,
    userText,
    responseText,
    NULL as text_payload,
    json_payload
  FROM cx_logs_filtered
),

combined_response_logs_null AS (
  SELECT
    'ES' AS source_data, 
    severity,
    timestamp,
    minute,
    hour,
    day,
    month,
    year,
    insert_id,
    log_id,
    log_name,
    NULL AS project_id,  -- Not present in ES logs
    NULL AS agent_id,     -- Not present in ES logs
    session_id,
    NULL AS matchType,    -- Not present in ES logs
    intent_name,
    NULL AS event_name,   -- Not present in ES logs
    userText,
    responseText,
    language_code,
    text_payload,
    NULL as json_payload,
  FROM es_logs_filtered

  UNION ALL 

  SELECT
    'CX' AS source_data,
    severity,
    timestamp,
    minute,
    hour,
    day,
    month,
    year,
    insert_id,
    log_id,
    log_name,
    project_id,
    agent_id,
    session_id,
    matchType,
    intent_name,
    event_name,
    userText,
    responseText,
    language_code,
    NULL as text_payload,
    json_payload
  FROM cx_logs_filtered
),

combined_request_logs AS (
  SELECT
    'ES' AS source_data, 
    severity,
    timestamp,
    EXTRACT(MINUTE FROM timestamp) AS minute,
    EXTRACT(HOUR FROM timestamp) AS hour,
    EXTRACT(DAY FROM timestamp) AS day,
    EXTRACT(MONTH FROM timestamp) AS month,
    EXTRACT(YEAR FROM timestamp) AS year,
    session_id,
    intent_response_id,
    insert_id_a,
    insert_id_b,
    application_id,
    bot_id,
    first_name,
    from_user,
    NULL AS request_tag,
    NULL AS request_event,
    NULL AS request_text,
    NULL AS response_match,
    NULL AS response_event,
    response_intent,
    userText,
    responseText,
    language_code,
    text_payload,
    NULL as json_payload,
  FROM es_resquest_response_logs_join
  WHERE row_num = 1

  UNION ALL 

  SELECT
    'CX' AS source_data,
    severity,
    timestamp,
    EXTRACT(MINUTE FROM timestamp) AS minute,
    EXTRACT(HOUR FROM timestamp) AS hour,
    EXTRACT(DAY FROM timestamp) AS day,
    EXTRACT(MONTH FROM timestamp) AS month,
    EXTRACT(YEAR FROM timestamp) AS year,
    session_id,
    intent_response_id,
    insert_id_a,
    insert_id_b,
    application_id,
    bot_id,
    first_name,
    from_user,
    request_tag,
    request_event,
    request_text,
    response_match,
    response_event,
    response_intent,
    userText,
    responseText,
    language_code,
    NULL as text_payload,
    json_payload
  FROM cx_resquest_response_logs_join
  WHERE row_num = 1
)

SELECT
  #ROW_NUMBER() OVER() AS order_id,
  FORMAT_TIMESTAMP('%Y-%m', TIMESTAMP_TRUNC(timestamp, MONTH)) AS month,
  COUNT(month) AS total_requests
FROM combined_response_logs
GROUP BY month
ORDER BY month
This post is licensed under CC BY 4.0 by the author.