1#
2# import_rosetta_stone.py <Peter.Bienstman>
3#
4
5# Script to import audio and pictures from the Rosetta Stone V2 into a Sentence
6# card type.
7
8# Adapt it your own need. It uses linux external tools, so it needs to be
9# modified to run under Windows.
10
11import os
12import shutil
13
14from mnemosyne.script import Mnemosyne
15
16# 'data_dir = None' will use the default sysem location, edit as appropriate.
17data_dir = None
18mnemosyne = Mnemosyne(data_dir)
19
20# Answer questions coming from libmnemosyne.
21
22def show_question(question, option0, option1, option2):
23    # Aswer 'no' when adding duplicate cards.
24    if question.startswith("There is already"):
25        return 2
26    # Answer 'yes' for making tag active.
27    if question.startswith("Make tag"):
28        return 0
29    else:
30        raise NotImplementedError
31
32mnemosyne.main_widget().show_question = show_question
33
34# This script will add tags like TRS Arabic::Unit 1::Lesson 1
35tag_prefix = "TRS Arabic"
36
37# Card type.
38card_type = mnemosyne.card_type_with_id("6::Arabic MSA sentences")
39
40# Directory containing foreign language, with directories like ARA01_01
41# and PCT01_01
42foreign_directory = "/home/pbienst/tmp/trs_arabic"
43
44# Directory containing native language, to generate translations.
45native_directory = "/home/pbienst/tmp/trs_english"
46
47# Subdirectory in the media directory to used.
48media_subdir = "trs_ara"
49full_media_subdir = os.path.join(mnemosyne.database().media_dir(), media_subdir)
50if not os.path.exists(full_media_subdir):
51    os.mkdir(full_media_subdir)
52
53# Codec that was used to encode the foreign language.
54foreign_codec = "iso-8859-6"
55native_codec = "latin-1"
56
57# Extract txt.
58def get_txt(directory, codec):
59    txt = {}
60    for path in sorted(os.listdir(directory)):
61        subdir = os.path.join(directory, path)
62        if os.path.isdir(subdir) and not path.startswith("PCT"):
63            # Determine unit and lesson number.
64            unit, lesson = path[3:].split("_")
65            unit = int(unit)
66            if unit not in txt:
67                txt[unit] = {}
68            lesson = int(lesson)
69            # Determine sentences.
70            txt_file = file(os.path.join(subdir,
71                [x for x in os.listdir(subdir) if x.endswith(".TXT")][0]))
72            entries = str(txt_file.read(), codec, errors="ignore") \
73                .replace(chr(336), "\'") \
74                .replace(chr(213), "\'") \
75                .replace(chr(210), "\"") \
76                .replace(chr(211), "\"") \
77                .split("@")[1:-1]
78            assert len(entries) == 40
79            txt[unit][lesson] = entries
80    return txt
81
82foreign_txt = get_txt(foreign_directory, foreign_codec)
83native_txt = get_txt(native_directory, native_codec)
84
85# Extract images.
86def extract_images(directory):
87    images = {}
88    for path in sorted(os.listdir(directory)):
89        subdir = os.path.join(directory, path)
90        if os.path.isdir(subdir) and path.startswith("PCT"):
91            # Detemine unit and lesson number.
92            unit, lesson = path[3:].split("_")
93            unit = int(unit)
94            if unit not in images:
95                images[unit] = {}
96            lesson = int(lesson)
97            img_dir = os.path.join(subdir,
98                [x for x in os.listdir(subdir) if x.startswith("P")][0])
99            img_list = []
100            for img in sorted(os.listdir(img_dir)):
101                full_path = os.path.join(img_dir, img)
102                if img.endswith("JPG"):
103                    shutil.copyfile(full_path, os.path.join(full_media_subdir, img))
104                    img_list.append(media_subdir + "/" + img)
105                if img.endswith("PCT"):
106                    os.system("convert " + full_path + " " + \
107                        os.path.join(full_media_subdir, img).replace("PCT", "JPG"))
108                    img_list.append(\
109                        media_subdir + "/" + img.replace("PCT", "JPG"))
110            images[unit][lesson] = img_list
111    return images
112
113images = extract_images(foreign_directory)
114
115# Extract sound.
116def extract_sound(directory):
117    sound = {}
118    for path in sorted(os.listdir(directory)):
119        subdir = os.path.join(directory, path)
120        if os.path.isdir(subdir) and not path.startswith("PCT"):
121            # Determine unit and lesson number.
122            unit, lesson = path[3:].split("_")
123            unit = int(unit)
124            if unit not in sound:
125                sound[unit] = {}
126            lesson = int(lesson)
127            snd_dir = os.path.join(subdir,
128                [x for x in os.listdir(subdir) if x.endswith("S")][0])
129            snd_list = []
130            for snd in sorted(os.listdir(snd_dir)):
131                full_path = os.path.join(snd_dir, snd)
132                if snd.endswith("SWA"):
133                    os.system("mplayer -vo null -vc dummy -af resample=44100 -ao pcm:waveheader " \
134                        + full_path + " && lame audiodump.wav " + \
135                        os.path.join(full_media_subdir, snd).replace("SWA", "MP3"))
136                    # High bitrate version, not really needed.
137                    #os.system("mplayer -vo null -vc dummy -af resample=44100 -ao pcm:waveheader " \
138                    #    + full_path + " && lame -h --resample 44.1 -b 128 audiodump.wav " + \
139                    #    os.path.join(full_media_subdir, snd).replace("SWA", "MP3"))
140                    snd_list.append(\
141                        media_subdir + "/" + snd.replace("SWA", "MP3"))
142            sound[unit][lesson] = snd_list
143    return sound
144
145sound = extract_sound(foreign_directory)
146
147for unit in foreign_txt:
148    for lesson in foreign_txt[unit]:
149        print(("unit", unit, "lesson", lesson))
150        for i in range(40):
151            print((foreign_txt[unit][lesson][i]))
152            print((native_txt[unit][lesson][i].replace(chr(336), "\'")))
153            print((images[unit][lesson][i]))
154            print((sound[unit][lesson][i]))
155            print()
156            fact_data = {"f": "["+foreign_txt[unit][lesson][i] + "]",
157                "p_1": "<audio src=\"" + sound[unit][lesson][i] + "\">",
158                "m_1": native_txt[unit][lesson][i] + \
159                    "\n<img src=\"" + images[unit][lesson][i] + "\">"}
160            mnemosyne.controller().create_new_cards(fact_data,
161            card_type, grade=-1, tag_names=[tag_prefix + "::Unit " + str(unit)\
162                + "::Lesson " + str(lesson)])
163        print()
164
165mnemosyne.finalise()
166