-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathscratchpad.py
More file actions
107 lines (82 loc) · 2.96 KB
/
scratchpad.py
File metadata and controls
107 lines (82 loc) · 2.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from batchalign import *
import json
from glob import glob
from pathlib import Path
from rich.console import Console
import copy
import os
import logging as L
LOG_FORMAT = '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s'
L.basicConfig(format=LOG_FORMAT, level=L.ERROR)
L.getLogger("stanza").setLevel(L.ERROR)
L.getLogger('batchalign').setLevel(L.DEBUG)
########
from batchalign import *
from batchalign.formats.chat import CHATFile
doc = CHATFile(path="../talkbank-alignment/output/short.cha").doc
doc
# from batchalign.formats.chat.parser import chat_parse_utterance
# from batchalign.formats.chat import CHATFile
# f = CHATFile(path="../talkbank-alignment/testing_playground/input/test.cha")
# doc = f.doc
doc[1].content[0].time = (4,2)
doc[1].content[2].time = (12425, 12825)
# doc[3].content[4].time = (14425, 14525)
f = CHATFile(doc=doc)
print(f._CHATFile__generate(f._CHATFile__doc, f._CHATFile__special_mor, write_wor=True))
# doc = Document.new("I've been feeling like a rap god")
# doc[0][0]
# # 1+1
# text = "Hello are you the f b i ?"
# langs = ["eng"]
# parsed, delim = chat_parse_utterance(text, None, None, None, None)
# ut = Utterance(content=parsed, delim=delim, text=text)
# doc = Document(content=[ut], langs=langs)
# # pipe = BatchalignPipeline.new("morphosyntax", lang="jpn")
# # doc_out = pipe(doc, retokenize=True)
# cf = CHATFile(path="/Users/houjun/Documents/Projects/talkbank-alignment/comma/mm.cha")
# print(str(cf))
# print(str())
# 1+1
# doc = CHATFile(path="../talkbank-alignment/input/011116.cha").doc
# newdoc = Document(content=[doc[4]], langs=["heb"])
# pipe = StanzaEngine()
# res = pipe(newdoc)
# print(CHATFile(doc=res))
# ours = BatchalignPipeline.new("asr", lang="eng", asr="rev")
# doc = Document.new(media_path="../talkbank-alignment/input/test.mp3", lang="eng")
# doc = ours(doc)
# CHATFile(doc=doc).write("../talkbank-alignment/input/test.cha")
# from pyannote.audio import Pipeline
# pipe = Pipeline.from_pretrained("talkbank/dia-fork")
# res = pipe("../talkbank-alignment/input/test.mp3", num_speakers=2)
# speakers = list(set([int(i[-1].split("_")[-1])
# for i in res.itertracks(yield_label=True)]))
# corpus = doc.tiers[0].corpus
# lang = doc.tiers[0].lang
# tiers = {
# i:
# Tier(
# lang=lang, corpus=corpus,
# id="PAR"+str(i), name="Participant",
# birthday="",
# )
# for i in speakers
# }
# for i in doc.content:
# if not isinstance(i, Utterance):
# continue
# if i.alignment is None:
# continue
# start,end = i.alignment
# if start is None or end is None:
# continue
# for (a,b),_,speaker in res.itertracks(yield_label=True):
# speaker_id = int(speaker.split("_")[-1])
# tier = tiers.get(speaker_id)
# # we set the end time of the utterance as the
# # *LAST* segment it ends before
# # i.e. [seg_end, ....., ut_end]
# # like that
# if b <= end/1000 and tier:
# i.tier = tier