Source code for speechmarkdown.formatters.microsoft_azure
import re
from typing import Any, Dict, List, Optional, Union
from speechmarkdown.formatters.data.microsoft_azure_voices import (
MICROSOFT_AZURE_ALL_VOICES,
)
from speechmarkdown.formatters.ssml_base import SsmlFormatterBase, TagsObject
from speechmarkdown.options import SpeechOptions
from speechmarkdown.parser import ASTNode
[docs]
class MicrosoftAzureSsmlFormatter(SsmlFormatterBase):
def __init__(self, options: SpeechOptions) -> None:
super().__init__(options)
[docs]
self.valid_roles = [
"Girl",
"Boy",
"YoungAdultFemale",
"YoungAdultMale",
"OlderAdultFemale",
"OlderAdultMale",
"SeniorFemale",
"SeniorMale",
]
self.modifier_key_to_ssml_tag_mappings.update(
{
"emphasis": "emphasis",
"address": "say-as",
"number": "say-as",
"characters": "say-as",
"expletive": None,
"fraction": "say-as",
"interjection": None,
"ordinal": "say-as",
"telephone": "say-as",
"unit": None,
"time": "say-as",
"date": "say-as",
"sub": "sub",
"ipa": "phoneme",
"rate": "prosody",
"pitch": "prosody",
"volume": "prosody",
"whisper": "prosody",
"voice": "voice",
"lang": "lang",
"style": "mstts:express-as",
"role": "mstts:express-as",
"newscaster": "mstts:express-as",
"excited": "mstts:express-as",
"disappointed": "mstts:express-as",
"friendly": "mstts:express-as",
"cheerful": "mstts:express-as",
"sad": "mstts:express-as",
"angry": "mstts:express-as",
"fearful": "mstts:express-as",
"empathetic": "mstts:express-as",
"calm": "mstts:express-as",
"lyrical": "mstts:express-as",
"hopeful": "mstts:express-as",
"terrified": "mstts:express-as",
"shouting": "mstts:express-as",
"whispering": "mstts:express-as",
"unfriendly": "mstts:express-as",
"gentle": "mstts:express-as",
"serious": "mstts:express-as",
"depressed": "mstts:express-as",
"embarrassed": "mstts:express-as",
"disgruntled": "mstts:express-as",
"envious": "mstts:express-as",
"affectionate": "mstts:express-as",
"assistant": "mstts:express-as",
"chat": "mstts:express-as",
"customerservice": "mstts:express-as",
"poetry-reading": "mstts:express-as",
"narration-professional": "mstts:express-as",
"narration-relaxed": "mstts:express-as",
"newscast-casual": "mstts:express-as",
"newscast-formal": "mstts:express-as",
"documentary-narration": "mstts:express-as",
"advertisement_upbeat": "mstts:express-as",
"sports_commentary": "mstts:express-as",
"sports_commentary_excited": "mstts:express-as",
}
)
[docs]
self.ssml_tag_sort_order = [
"emphasis",
"mstts:express-as",
"say-as",
"prosody",
"voice",
"lang",
"sub",
"phoneme",
]
[docs]
def get_voice_tag_fallback(self, name: str) -> Optional[Dict[str, Any]]:
if name.lower() == "device":
return None
return {"name": name}
[docs]
def contains_mstts_tag(self, lines: List[str]) -> bool:
mstts_prefix_regex = re.compile(r"</?mstts:")
return any(mstts_prefix_regex.search(line) for line in lines)
[docs]
def add_speak_tag(
self,
ast: Union[ASTNode, List[ASTNode]],
new_line: bool,
new_line_after_end: bool,
attr: Optional[Dict[str, Any]],
lines: List[str],
) -> List[str]:
content_lines: List[str] = []
self.processAst(ast, content_lines)
self.add_section_end_tag(content_lines)
has_mstts_tag = self.contains_mstts_tag(content_lines)
speak_attrs = attr or {}
if has_mstts_tag:
speak_attrs["xmlns:mstts"] = "https://www.w3.org/2001/mstts"
lines.append(self.start_tag("speak", speak_attrs, new_line))
lines.extend(content_lines)
lines.append(self.end_tag("speak", new_line))
if new_line_after_end:
lines.append("\n")
return lines
[docs]
def get_text_modifier_object(self, ast: ASTNode) -> TagsObject:
tmo = TagsObject(self)
express_as_attrs = {}
for child in ast.children:
if child.name in (
"plainText",
"plainTextSpecialChars",
"plainTextEmphasis",
"plainTextPhone",
"plainTextModifier",
):
tmo.text = child.allText
elif child.name == "textModifierKeyOptionalValue":
key = child.children[0].allText
key = self.modifier_key_mappings.get(key, key)
value = child.children[1].allText if len(child.children) == 2 else ""
ssml_tag = self.modifier_key_to_ssml_tag_mappings.get(key)
if key in ("address", "fraction", "ordinal", "telephone"):
tmo.tag(ssml_tag, {"interpret-as": key})
elif key == "number":
tmo.tag(ssml_tag, {"interpret-as": "cardinal"})
elif key == "characters":
try:
float(tmo.text)
attr_value = "digits"
except ValueError:
attr_value = "characters"
tmo.tag(ssml_tag, {"interpret-as": attr_value})
elif key == "date":
tmo.tag(ssml_tag, {"interpret-as": key, "format": value or "ymd"})
elif key == "time":
tmo.tag(ssml_tag, {"interpret-as": key, "format": value or "hms12"})
elif key == "whisper":
tmo.tag(ssml_tag, {"volume": "x-soft", "rate": "slow"})
elif key == "ipa":
tmo.tag(ssml_tag, {"alphabet": key, "ph": value})
elif key == "sub":
tmo.tag(ssml_tag, {"alias": value})
elif key in ("volume", "rate", "pitch"):
tmo.tag(ssml_tag, {key: value or "medium"}, True)
elif key == "voice":
tmo.voice_tag(value)
elif key == "style":
express_as_attrs["style"] = value
elif key == "role":
express_as_attrs["role"] = value
elif key in [
"excited",
"disappointed",
"friendly",
"cheerful",
"sad",
"angry",
"fearful",
"empathetic",
"calm",
"lyrical",
"hopeful",
"terrified",
"shouting",
"whispering",
"unfriendly",
"gentle",
"serious",
"depressed",
"embarrassed",
"disgruntled",
"envious",
"affectionate",
"assistant",
"chat",
"customerservice",
"poetry-reading",
"narration-professional",
"narration-relaxed",
"newscast-casual",
"newscast-formal",
"newscaster",
"documentary-narration",
"advertisement_upbeat",
"sports_commentary",
"sports_commentary_excited",
]:
express_as_attrs["style"] = (
"newscast" if key == "newscaster" else key
)
if value:
try:
style_degree = float(value)
if (
self.min_style_degree
<= style_degree
<= self.max_style_degree
):
express_as_attrs["styledegree"] = value
except ValueError:
pass
elif key == "lang":
tmo.tag(ssml_tag, {"xml:lang": value})
if express_as_attrs.get("style"):
ssml_tag = self.modifier_key_to_ssml_tag_mappings["excited"]
tmo.tag(ssml_tag, express_as_attrs)
return tmo
[docs]
def get_section_object(self, ast: ASTNode) -> TagsObject:
so = TagsObject(self)
for child in ast.children:
if child.name == "sectionModifierKeyOptionalValue":
key = child.children[0].allText
value = child.children[1].allText if len(child.children) == 2 else ""
ssml_tag = self.modifier_key_to_ssml_tag_mappings.get(key)
if key == "voice":
so.voice_tag(value)
elif key == "defaults":
pass
elif key in [
"excited",
"disappointed",
"friendly",
"cheerful",
"sad",
"angry",
"fearful",
"empathetic",
"calm",
"lyrical",
"hopeful",
"terrified",
"shouting",
"whispering",
"unfriendly",
"gentle",
"serious",
"depressed",
"embarrassed",
"disgruntled",
"envious",
"affectionate",
"assistant",
"chat",
"customerservice",
"poetry-reading",
"narration-professional",
"narration-relaxed",
"newscast-casual",
"newscast-formal",
"newscaster",
"documentary-narration",
"advertisement_upbeat",
"sports_commentary",
"sports_commentary_excited",
]:
attrs = {"style": "newscast" if key == "newscaster" else key}
if value:
try:
style_degree = float(value)
if (
self.min_style_degree
<= style_degree
<= self.max_style_degree
):
attrs["styledegree"] = value
except ValueError:
pass
so.tag(ssml_tag, attrs)
elif key == "lang":
so.tag(ssml_tag, {"xml:lang": value})
return so
[docs]
def formatFromAst(
self, ast: ASTNode, lines: Optional[List[str]] = None
) -> List[str]:
if lines is None:
lines = []
if not hasattr(ast, "name"):
return lines
if ast.name == "document":
if getattr(self.options, "includeFormatterComment", False):
self.add_comment(
"Converted from Speech Markdown to SSML for Microsoft Azure", lines
)
if getattr(self.options, "includeSpeakTag", True):
return self.add_speak_tag(ast.children, True, False, None, lines)
self.processAst(ast.children, lines)
return lines
elif ast.name == "paragraph":
if getattr(self.options, "includeParagraphTag", False):
return self.add_tag("p", ast.children, True, False, None, lines)
self.processAst(ast.children, lines)
return lines
elif ast.name == "shortBreak":
time = ast.children[0].allText
return self.add_tag_with_attrs(lines, None, "break", {"time": time})
elif ast.name == "break":
val = ast.children[0].allText
attrs = {}
if ast.children[0].children[0].name == "breakStrengthValue":
attrs = {"strength": val}
elif ast.children[0].children[0].name == "time":
attrs = {"time": val}
return self.add_tag_with_attrs(lines, None, "break", attrs)
elif ast.name == "markTag":
name = ast.children[0].allText
return self.add_tag_with_attrs(
lines, None, "bookmark", {"mark": name}, False
)
elif ast.name == "shortEmphasisModerate":
text = ast.children[0].allText
return self.add_tag_with_attrs(
lines, text, "emphasis", {"level": "moderate"}
)
elif ast.name == "shortEmphasisStrong":
text = ast.children[0].allText
return self.add_tag_with_attrs(lines, text, "emphasis", {"level": "strong"})
elif ast.name == "shortEmphasisNone":
text = ast.children[0].allText
return self.add_tag_with_attrs(lines, text, "emphasis", {"level": "none"})
elif ast.name == "shortEmphasisReduced":
text = ast.children[0].allText
return self.add_tag_with_attrs(
lines, text, "emphasis", {"level": "reduced"}
)
elif ast.name == "textModifier":
tmo = self.get_text_modifier_object(ast)
return self.apply_tags_object(tmo, lines)
elif ast.name == "shortIpa":
tmo = self.get_short_ipa_object(ast)
return self.apply_tags_object(tmo, lines)
elif ast.name == "bareIpa":
tmo = self.get_short_ipa_object(ast, "ipa")
return self.apply_tags_object(tmo, lines)
elif ast.name == "shortSub":
tmo = self.get_short_sub_object(ast)
return self.apply_tags_object(tmo, lines)
elif ast.name == "audio":
index = 1 if len(ast.children) == 2 else 0
url = ast.children[index].allText.replace("&", "&")
return self.add_tag_with_attrs(lines, None, "audio", {"src": url}, False)
elif ast.name == "simpleLine":
self.processAst(ast.children, lines)
return lines
elif ast.name == "lineEnd":
lines.append(ast.allText)
return lines
elif ast.name == "emptyLine":
if getattr(self.options, "preserveEmptyLines", True):
lines.append(ast.allText)
return lines
elif ast.name in (
"plainText",
"plainTextSpecialChars",
"plainTextEmphasis",
"plainTextPhone",
"plainTextModifier",
):
text = (
self.escape_xml_characters(ast.allText)
if getattr(self.options, "escapeXmlSymbols", False)
else ast.allText
)
lines.append(text)
return lines
elif ast.name == "section":
so = self.get_section_object(ast)
tags_sorted_asc = sorted(so.tags.keys(), key=lambda t: so.tags[t]["sortId"])
self.add_section_end_tag(lines)
self.add_section_start_tag(tags_sorted_asc, so, lines)
return lines
else:
self.processAst(ast.children, lines)
return lines