Source code for speechmarkdown.formatters.microsoft_azure

import re
from typing import Any, Dict, List, Optional, Union

from speechmarkdown.formatters.data.microsoft_azure_voices import (
    MICROSOFT_AZURE_ALL_VOICES,
)
from speechmarkdown.formatters.ssml_base import SsmlFormatterBase, TagsObject
from speechmarkdown.options import SpeechOptions
from speechmarkdown.parser import ASTNode


[docs] class MicrosoftAzureSsmlFormatter(SsmlFormatterBase): def __init__(self, options: SpeechOptions) -> None: super().__init__(options)
[docs] self.valid_voices = MICROSOFT_AZURE_ALL_VOICES
[docs] self.min_style_degree = 0.01
[docs] self.max_style_degree = 2.0
[docs] self.valid_roles = [ "Girl", "Boy", "YoungAdultFemale", "YoungAdultMale", "OlderAdultFemale", "OlderAdultMale", "SeniorFemale", "SeniorMale", ]
self.modifier_key_to_ssml_tag_mappings.update( { "emphasis": "emphasis", "address": "say-as", "number": "say-as", "characters": "say-as", "expletive": None, "fraction": "say-as", "interjection": None, "ordinal": "say-as", "telephone": "say-as", "unit": None, "time": "say-as", "date": "say-as", "sub": "sub", "ipa": "phoneme", "rate": "prosody", "pitch": "prosody", "volume": "prosody", "whisper": "prosody", "voice": "voice", "lang": "lang", "style": "mstts:express-as", "role": "mstts:express-as", "newscaster": "mstts:express-as", "excited": "mstts:express-as", "disappointed": "mstts:express-as", "friendly": "mstts:express-as", "cheerful": "mstts:express-as", "sad": "mstts:express-as", "angry": "mstts:express-as", "fearful": "mstts:express-as", "empathetic": "mstts:express-as", "calm": "mstts:express-as", "lyrical": "mstts:express-as", "hopeful": "mstts:express-as", "terrified": "mstts:express-as", "shouting": "mstts:express-as", "whispering": "mstts:express-as", "unfriendly": "mstts:express-as", "gentle": "mstts:express-as", "serious": "mstts:express-as", "depressed": "mstts:express-as", "embarrassed": "mstts:express-as", "disgruntled": "mstts:express-as", "envious": "mstts:express-as", "affectionate": "mstts:express-as", "assistant": "mstts:express-as", "chat": "mstts:express-as", "customerservice": "mstts:express-as", "poetry-reading": "mstts:express-as", "narration-professional": "mstts:express-as", "narration-relaxed": "mstts:express-as", "newscast-casual": "mstts:express-as", "newscast-formal": "mstts:express-as", "documentary-narration": "mstts:express-as", "advertisement_upbeat": "mstts:express-as", "sports_commentary": "mstts:express-as", "sports_commentary_excited": "mstts:express-as", } )
[docs] self.ssml_tag_sort_order = [ "emphasis", "mstts:express-as", "say-as", "prosody", "voice", "lang", "sub", "phoneme", ]
[docs] def get_voice_tag_fallback(self, name: str) -> Optional[Dict[str, Any]]: if name.lower() == "device": return None return {"name": name}
[docs] def contains_mstts_tag(self, lines: List[str]) -> bool: mstts_prefix_regex = re.compile(r"</?mstts:") return any(mstts_prefix_regex.search(line) for line in lines)
[docs] def add_speak_tag( self, ast: Union[ASTNode, List[ASTNode]], new_line: bool, new_line_after_end: bool, attr: Optional[Dict[str, Any]], lines: List[str], ) -> List[str]: content_lines: List[str] = [] self.processAst(ast, content_lines) self.add_section_end_tag(content_lines) has_mstts_tag = self.contains_mstts_tag(content_lines) speak_attrs = attr or {} if has_mstts_tag: speak_attrs["xmlns:mstts"] = "https://www.w3.org/2001/mstts" lines.append(self.start_tag("speak", speak_attrs, new_line)) lines.extend(content_lines) lines.append(self.end_tag("speak", new_line)) if new_line_after_end: lines.append("\n") return lines
[docs] def get_text_modifier_object(self, ast: ASTNode) -> TagsObject: tmo = TagsObject(self) express_as_attrs = {} for child in ast.children: if child.name in ( "plainText", "plainTextSpecialChars", "plainTextEmphasis", "plainTextPhone", "plainTextModifier", ): tmo.text = child.allText elif child.name == "textModifierKeyOptionalValue": key = child.children[0].allText key = self.modifier_key_mappings.get(key, key) value = child.children[1].allText if len(child.children) == 2 else "" ssml_tag = self.modifier_key_to_ssml_tag_mappings.get(key) if key in ("address", "fraction", "ordinal", "telephone"): tmo.tag(ssml_tag, {"interpret-as": key}) elif key == "number": tmo.tag(ssml_tag, {"interpret-as": "cardinal"}) elif key == "characters": try: float(tmo.text) attr_value = "digits" except ValueError: attr_value = "characters" tmo.tag(ssml_tag, {"interpret-as": attr_value}) elif key == "date": tmo.tag(ssml_tag, {"interpret-as": key, "format": value or "ymd"}) elif key == "time": tmo.tag(ssml_tag, {"interpret-as": key, "format": value or "hms12"}) elif key == "whisper": tmo.tag(ssml_tag, {"volume": "x-soft", "rate": "slow"}) elif key == "ipa": tmo.tag(ssml_tag, {"alphabet": key, "ph": value}) elif key == "sub": tmo.tag(ssml_tag, {"alias": value}) elif key in ("volume", "rate", "pitch"): tmo.tag(ssml_tag, {key: value or "medium"}, True) elif key == "voice": tmo.voice_tag(value) elif key == "style": express_as_attrs["style"] = value elif key == "role": express_as_attrs["role"] = value elif key in [ "excited", "disappointed", "friendly", "cheerful", "sad", "angry", "fearful", "empathetic", "calm", "lyrical", "hopeful", "terrified", "shouting", "whispering", "unfriendly", "gentle", "serious", "depressed", "embarrassed", "disgruntled", "envious", "affectionate", "assistant", "chat", "customerservice", "poetry-reading", "narration-professional", "narration-relaxed", "newscast-casual", "newscast-formal", "newscaster", "documentary-narration", "advertisement_upbeat", "sports_commentary", "sports_commentary_excited", ]: express_as_attrs["style"] = ( "newscast" if key == "newscaster" else key ) if value: try: style_degree = float(value) if ( self.min_style_degree <= style_degree <= self.max_style_degree ): express_as_attrs["styledegree"] = value except ValueError: pass elif key == "lang": tmo.tag(ssml_tag, {"xml:lang": value}) if express_as_attrs.get("style"): ssml_tag = self.modifier_key_to_ssml_tag_mappings["excited"] tmo.tag(ssml_tag, express_as_attrs) return tmo
[docs] def get_section_object(self, ast: ASTNode) -> TagsObject: so = TagsObject(self) for child in ast.children: if child.name == "sectionModifierKeyOptionalValue": key = child.children[0].allText value = child.children[1].allText if len(child.children) == 2 else "" ssml_tag = self.modifier_key_to_ssml_tag_mappings.get(key) if key == "voice": so.voice_tag(value) elif key == "defaults": pass elif key in [ "excited", "disappointed", "friendly", "cheerful", "sad", "angry", "fearful", "empathetic", "calm", "lyrical", "hopeful", "terrified", "shouting", "whispering", "unfriendly", "gentle", "serious", "depressed", "embarrassed", "disgruntled", "envious", "affectionate", "assistant", "chat", "customerservice", "poetry-reading", "narration-professional", "narration-relaxed", "newscast-casual", "newscast-formal", "newscaster", "documentary-narration", "advertisement_upbeat", "sports_commentary", "sports_commentary_excited", ]: attrs = {"style": "newscast" if key == "newscaster" else key} if value: try: style_degree = float(value) if ( self.min_style_degree <= style_degree <= self.max_style_degree ): attrs["styledegree"] = value except ValueError: pass so.tag(ssml_tag, attrs) elif key == "lang": so.tag(ssml_tag, {"xml:lang": value}) return so
[docs] def formatFromAst( self, ast: ASTNode, lines: Optional[List[str]] = None ) -> List[str]: if lines is None: lines = [] if not hasattr(ast, "name"): return lines if ast.name == "document": if getattr(self.options, "includeFormatterComment", False): self.add_comment( "Converted from Speech Markdown to SSML for Microsoft Azure", lines ) if getattr(self.options, "includeSpeakTag", True): return self.add_speak_tag(ast.children, True, False, None, lines) self.processAst(ast.children, lines) return lines elif ast.name == "paragraph": if getattr(self.options, "includeParagraphTag", False): return self.add_tag("p", ast.children, True, False, None, lines) self.processAst(ast.children, lines) return lines elif ast.name == "shortBreak": time = ast.children[0].allText return self.add_tag_with_attrs(lines, None, "break", {"time": time}) elif ast.name == "break": val = ast.children[0].allText attrs = {} if ast.children[0].children[0].name == "breakStrengthValue": attrs = {"strength": val} elif ast.children[0].children[0].name == "time": attrs = {"time": val} return self.add_tag_with_attrs(lines, None, "break", attrs) elif ast.name == "markTag": name = ast.children[0].allText return self.add_tag_with_attrs( lines, None, "bookmark", {"mark": name}, False ) elif ast.name == "shortEmphasisModerate": text = ast.children[0].allText return self.add_tag_with_attrs( lines, text, "emphasis", {"level": "moderate"} ) elif ast.name == "shortEmphasisStrong": text = ast.children[0].allText return self.add_tag_with_attrs(lines, text, "emphasis", {"level": "strong"}) elif ast.name == "shortEmphasisNone": text = ast.children[0].allText return self.add_tag_with_attrs(lines, text, "emphasis", {"level": "none"}) elif ast.name == "shortEmphasisReduced": text = ast.children[0].allText return self.add_tag_with_attrs( lines, text, "emphasis", {"level": "reduced"} ) elif ast.name == "textModifier": tmo = self.get_text_modifier_object(ast) return self.apply_tags_object(tmo, lines) elif ast.name == "shortIpa": tmo = self.get_short_ipa_object(ast) return self.apply_tags_object(tmo, lines) elif ast.name == "bareIpa": tmo = self.get_short_ipa_object(ast, "ipa") return self.apply_tags_object(tmo, lines) elif ast.name == "shortSub": tmo = self.get_short_sub_object(ast) return self.apply_tags_object(tmo, lines) elif ast.name == "audio": index = 1 if len(ast.children) == 2 else 0 url = ast.children[index].allText.replace("&", "&amp;") return self.add_tag_with_attrs(lines, None, "audio", {"src": url}, False) elif ast.name == "simpleLine": self.processAst(ast.children, lines) return lines elif ast.name == "lineEnd": lines.append(ast.allText) return lines elif ast.name == "emptyLine": if getattr(self.options, "preserveEmptyLines", True): lines.append(ast.allText) return lines elif ast.name in ( "plainText", "plainTextSpecialChars", "plainTextEmphasis", "plainTextPhone", "plainTextModifier", ): text = ( self.escape_xml_characters(ast.allText) if getattr(self.options, "escapeXmlSymbols", False) else ast.allText ) lines.append(text) return lines elif ast.name == "section": so = self.get_section_object(ast) tags_sorted_asc = sorted(so.tags.keys(), key=lambda t: so.tags[t]["sortId"]) self.add_section_end_tag(lines) self.add_section_start_tag(tags_sorted_asc, so, lines) return lines else: self.processAst(ast.children, lines) return lines