villesau / whisper-timestamped

Transcribes audio using Whisper Large V3 with precise word-level timestamps and confidence scores.

  • Public
  • 5.1K runs
  • L40S
  • GitHub
  • Weights
  • License
  • Prediction

    villesau/whisper-timestamped:c5b122b7e513b1b5a6ef849891c538869b77cc932cbd0f8203e11d3b357553b8
    ID
    ynch60wav5rgp0cjazcstgbx9m
    Status
    Succeeded
    Source
    Web
    Hardware
    A40
    Total duration
    Created
    by @villesau

    Input

    vad
    task
    transcribe
    verbose
    language
    auto
    temperature
    0
    suppress_tokens
    -1
    logprob_threshold
    -1
    detect_disfluencies
    no_speech_threshold
    0.6
    compute_word_confidence
    condition_on_previous_text
    compression_ratio_threshold
    2.4

    Output

    { "text": " That is exactly what happened.", "language": "en", "segments": [ { "id": 0, "end": 2.19, "seek": 0, "text": " That is exactly what happened.", "start": 0.57, "words": [ { "end": 0.79, "text": "That", "start": 0.57, "confidence": 0.83 }, { "end": 1.01, "text": "is", "start": 0.79, "confidence": 0.984 }, { "end": 1.53, "text": "exactly", "start": 1.01, "confidence": 0.994 }, { "end": 1.79, "text": "what", "start": 1.53, "confidence": 0.998 }, { "end": 2.19, "text": "happened.", "start": 1.79, "confidence": 0.995 } ], "tokens": [ 50365, 663, 307, 2293, 437, 2011, 13, 50474 ], "confidence": 0.958, "avg_logprob": -0.34747041596306694, "temperature": 0, "no_speech_prob": 0.04611295834183693, "compression_ratio": 0.8571428571428571 } ], "language_probs": { "af": 0.000002371588834648719, "am": 6.9843961725268855e-9, "ar": 0.00005482712367665954, "as": 2.9062825035452988e-8, "az": 2.945290873412887e-7, "ba": 9.690693802966166e-10, "be": 6.728683388246282e-7, "bg": 0.0000029399832328635966, "bn": 0.0000016882902400539024, "bo": 2.985835010349547e-7, "br": 0.000011138805348309688, "bs": 0.000002210563707194524, "ca": 0.000003383899866093998, "cs": 0.000012233585039211903, "cy": 0.0006274632178246975, "da": 0.000011764923328883015, "de": 0.00038960075471550226, "el": 0.000039800219383323565, "en": 0.9933839440345764, "es": 0.0010508032282814384, "et": 4.752749305225734e-7, "eu": 5.459707494992472e-7, "fa": 0.000005535690434044227, "fi": 0.00005612731183646247, "fo": 7.26124596894806e-7, "fr": 0.0002457181108184159, "gl": 0.0000023995444280444644, "gu": 5.464226049411991e-8, "ha": 2.30658558919572e-9, "he": 0.00001144758516602451, "hi": 0.000022501006242237054, "hr": 0.000006943310836504679, "ht": 0.0000010081241725856671, "hu": 0.00001402587349730311, "hy": 3.7086812199049746e-7, "id": 0.00002703558129724115, "is": 0.000002271842504342203, "it": 0.00009622459765523672, "ja": 0.00022026030637789518, "jw": 0.000040744060243014246, "ka": 3.7445172296202145e-8, "kk": 2.761446182830696e-7, "km": 0.000018221870050183497, "kn": 1.0984273757230767e-7, "ko": 0.00015618793258909136, "la": 0.00029871726292185485, "lb": 2.6137041420071228e-9, "ln": 1.9256308547710432e-8, "lo": 1.5950840293044166e-7, "lt": 0.0000011158916777276318, "lv": 0.000001728326878946973, "mg": 1.0437287700781894e-9, "mi": 0.000044748594518750906, "mk": 6.507966787694386e-8, "ml": 0.0000017418822153558722, "mn": 3.9248061511898413e-7, "mr": 2.4850325530678674e-7, "ms": 0.000025596773411962204, "mt": 8.413714880362022e-8, "my": 3.1413753731612815e-7, "ne": 3.6440593476072536e-7, "nl": 0.00017698411829769611, "nn": 0.0006081582396291196, "no": 0.000014756519703951199, "oc": 3.350514248268155e-7, "pa": 5.089006549496844e-7, "pl": 0.00009927909559337422, "ps": 2.4132847897817555e-7, "pt": 0.0005494251381605864, "ro": 0.00001627029814699199, "ru": 0.0010841591283679008, "sa": 0.000003081075647060061, "sd": 2.893970645345689e-7, "si": 0.000011996966350125149, "sk": 0.0000026046805032819975, "sl": 0.0000049043501348933205, "sn": 0.00001487225654273061, "so": 1.1598242366517297e-9, "sq": 9.222627284088958e-8, "sr": 4.499812291669514e-7, "su": 4.686813159793246e-9, "sv": 0.00007493978773709387, "sw": 0.0000033444757718825713, "ta": 0.00000844091937324265, "te": 0.0000026768848329083994, "tg": 2.1332433597365252e-9, "th": 0.000010546009434619918, "tk": 7.971348026103442e-10, "tl": 0.00009399555710842833, "tr": 0.000055257140047615394, "tt": 3.349514221540062e-9, "uk": 0.000009677562957222108, "ur": 0.000013073552509013098, "uz": 2.3152679773374985e-10, "vi": 0.000021979773009661585, "yi": 0.0000010943083452730207, "yo": 0.0000023623429115104955, "zh": 0.00008693167910678312, "haw": 0.0001264848542632535, "yue": 5.26081407770107e-7 }, "speech_activity": [ { "end": 2.8735, "start": 0.046 } ] }
    Generated in

Want to make some of these yourself?

Run this model