{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "http://uniphore.com/schemas/nlp/transcript.json",
"title": "Transcript",
"description": "ASR Transcript",
"type": "object",
"definitions": {
"transcript_turn": {
"description": "ASR Transcript Turn",
"type": "object",
"properties": {
"order": {
"description": "Order of the turn in the transcript.",
"type": "integer"
},
"speaker": {
"description": "Speaker diarisation output (+ identification)",
"type": "string"
},
"startOffset": {
"description": "Start offset time of turn in call",
"type": "integer"
},
"endOffset": {
"description": "End offset time of turn in call",
"type": "integer"
},
"words": {
"description": "List of tokens.",
"type": "array",
"minItems": 1,
"items": {
"type": "object",
"properties": {
"text": {
"type": "string"
},
"startOffset": {
"type": "integer"
},
"endOffset": {
"type": "integer"
},
"confidence": {
"type": "number"
},
"meta": {
"type": "object"
}
},
"required": [
"text",
"startOffset",
"endOffset"
]
}
},
"metaData": {
"description": "Turn metadata.",
"type": "object"
}
},
"required": [
"order",
"speaker",
"words"
]
}
},
"properties": {
"sessionId": {
"description": "Unique contact id",
"type": "string"
},
"turns": {
"description": "List of TranscriptTurns.",
"type": "array",
"items": {
"$ref": "#/definitions/transcript_turn"
}
},
"wsText": {
"description": "Words joined on white space. Use underscore for multiwords expressions if required.",
"type": "array",
"items": {
"description": "words related to each turn in order",
"type": "string"
}
},
"isChunk": {
"description": "True if this is only a chunk of a transcript. False otherwise",
"type": "boolean"
},
"isLastChunk": {
"description": "True if this is the last chunk of a transcript. False otherwise",
"type": "boolean"
},
"lang": {
"description": "Language in ISO 639-1 format",
"type": "string"
},
"startTime": {
"description": "contact start time, UNIX Epoch time",
"type": "integer"
},
"metaData": {
"description": "contact level meta data.",
"type": "object",
"properties": {
"agentId": {
"description": "Agent identifier of a transcript.",
"type": "string"
},
"customerId": {
"description": "customer identifier of a transcript.",
"type": "string"
},
"contactId": {
"description": "Contact identifier of a transcript assinged by platform.",
"type": "string"
},
"clientContactId": {
"description": "Client contact identifier of a transcript.",
"type": "string"
},
"journeyId": {
"description": "Journey identifier of a transcript.",
"type": "string"
},
"contactType": {
"description": "Contact type of a transcript (voice, chat, email).",
"type": "string"
},
"contactDuration": {
"description": "Contact duration of a transcript",
"type": "integer"
},
"audioType": {
"description": "Audio type of voice contact (stereo, mono).",
"type": "string"
},
"isDiarized": {
"description": "True if the audio is diarized, false otherwise",
"type": "boolean"
},
"tenantId": {
"description": "Tenant identifier of a transcript.",
"type": "string"
},
"organizationId": {
"description": "organization identifier of a transcript.",
"type": "string"
},
"categoryId": {
"description": "Category (Business Process) identifier of a transcript.",
"type": "string"
},
"type": {
"description": "Type of a transcript (live or batch).",
"type": "string"
}
},
"required": [
"agentId",
"customerId",
"clientContactId",
"contactType",
"contactDuration",
"tenantId",
"organizationId",
"categoryId",
"type"
]
}
},
"required": [
"sessionId",
"startTime",
"isChunk",
"isLastChunk",
"turns",
"metaData"
]
}