Skip to main content

Uniphore Customer Portal

1. Transcript Schema
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "$id": "http://uniphore.com/schemas/nlp/transcript.json",
  "title": "Transcript",
  "description": "ASR Transcript",
  "type": "object",
  "definitions": {
    "transcript_turn": {
      "description": "ASR Transcript Turn",
      "type": "object",
      "properties": {
        "order": {
          "description": "Order of the turn in the transcript.",
          "type": "integer"
        },
        "speaker": {
          "description": "Speaker diarisation output (+ identification)",
          "type": "string"
        },
        "startOffset": {
          "description": "Start offset time of turn in call",
          "type": "integer"
        },
        "endOffset": {
          "description": "End offset time of turn in call",
          "type": "integer"
        },
        "words": {
          "description": "List of tokens.",
          "type": "array",
          "minItems": 1,
          "items": {
            "type": "object",
            "properties": {
              "text": {
                "type": "string"
              },
              "startOffset": {
                "type": "integer"
              },
              "endOffset": {
                "type": "integer"
              },
              "confidence": {
                "type": "number"
              },
              "meta": {
                "type": "object"
              }
            },
            "required": [
              "text",
              "startOffset",
              "endOffset"
            ]
          }
        },
        "metaData": {
          "description": "Turn metadata.",
          "type": "object"
        }
      },
      "required": [
        "order",
        "speaker",
        "words"
      ]
    }
  },
  "properties": {
    "sessionId": {
      "description": "Unique contact id",
      "type": "string"
    },
    "turns": {
      "description": "List of TranscriptTurns.",
      "type": "array",
      "items": {
        "$ref": "#/definitions/transcript_turn"
      }
    },
    "wsText": {
      "description": "Words joined on white space. Use underscore for multiwords expressions if required.",
      "type": "array",
      "items": {
        "description": "words related to each turn in order",
        "type": "string"
      }
    },
    "isChunk": {
      "description": "True if this is only a chunk of a transcript. False otherwise",
      "type": "boolean"
    },
    "isLastChunk": {
      "description": "True if this is the last chunk of a transcript. False otherwise",
      "type": "boolean"
    },
    "lang": {
      "description": "Language in ISO 639-1 format",
      "type": "string"
    },
    "startTime": {
      "description": "contact start time, UNIX Epoch time",
      "type": "integer"
    },
    "metaData": {
      "description": "contact level meta data.",
      "type": "object",
      "properties": {
        "agentId": {
          "description": "Agent identifier of a transcript.",
          "type": "string"
        },
        "customerId": {
          "description": "customer identifier of a transcript.",
          "type": "string"
        },
        "contactId": {
          "description": "Contact identifier of a transcript assinged by platform.",
          "type": "string"
        },
        "clientContactId": {
          "description": "Client contact identifier of a transcript.",
          "type": "string"
        },
        "journeyId": {
          "description": "Journey identifier of a transcript.",
          "type": "string"
        },
        "contactType": {
          "description": "Contact type of a transcript (voice, chat, email).",
          "type": "string"
        },
        "contactDuration": {
          "description": "Contact duration of a transcript",
          "type": "integer"
        },
        "audioType": {
          "description": "Audio type of voice contact (stereo, mono).",
          "type": "string"
        },
        "isDiarized": {
          "description": "True if the audio is diarized, false otherwise",
          "type": "boolean"
        },
        "tenantId": {
          "description": "Tenant identifier of a transcript.",
          "type": "string"
        },
        "organizationId": {
          "description": "organization identifier of a transcript.",
          "type": "string"
        },
        "categoryId": {
          "description": "Category (Business Process) identifier of a transcript.",
          "type": "string"
        },
        "type": {
          "description": "Type of a transcript (live or batch).",
          "type": "string"
        }
      },
      "required": [
        "agentId",
        "customerId",
        "clientContactId",
        "contactType",
        "contactDuration",
        "tenantId",
        "organizationId",
        "categoryId",
        "type"
      ]
    }
  },
  "required": [
    "sessionId",
    "startTime",
    "isChunk",
    "isLastChunk",
    "turns",
    "metaData"
  ]
}