1# 2# import_rosetta_stone.py <Peter.Bienstman> 3# 4 5# Script to import audio and pictures from the Rosetta Stone V2 into a Sentence 6# card type. 7 8# Adapt it your own need. It uses linux external tools, so it needs to be 9# modified to run under Windows. 10 11import os 12import shutil 13 14from mnemosyne.script import Mnemosyne 15 16# 'data_dir = None' will use the default sysem location, edit as appropriate. 17data_dir = None 18mnemosyne = Mnemosyne(data_dir) 19 20# Answer questions coming from libmnemosyne. 21 22def show_question(question, option0, option1, option2): 23 # Aswer 'no' when adding duplicate cards. 24 if question.startswith("There is already"): 25 return 2 26 # Answer 'yes' for making tag active. 27 if question.startswith("Make tag"): 28 return 0 29 else: 30 raise NotImplementedError 31 32mnemosyne.main_widget().show_question = show_question 33 34# This script will add tags like TRS Arabic::Unit 1::Lesson 1 35tag_prefix = "TRS Arabic" 36 37# Card type. 38card_type = mnemosyne.card_type_with_id("6::Arabic MSA sentences") 39 40# Directory containing foreign language, with directories like ARA01_01 41# and PCT01_01 42foreign_directory = "/home/pbienst/tmp/trs_arabic" 43 44# Directory containing native language, to generate translations. 45native_directory = "/home/pbienst/tmp/trs_english" 46 47# Subdirectory in the media directory to used. 48media_subdir = "trs_ara" 49full_media_subdir = os.path.join(mnemosyne.database().media_dir(), media_subdir) 50if not os.path.exists(full_media_subdir): 51 os.mkdir(full_media_subdir) 52 53# Codec that was used to encode the foreign language. 54foreign_codec = "iso-8859-6" 55native_codec = "latin-1" 56 57# Extract txt. 58def get_txt(directory, codec): 59 txt = {} 60 for path in sorted(os.listdir(directory)): 61 subdir = os.path.join(directory, path) 62 if os.path.isdir(subdir) and not path.startswith("PCT"): 63 # Determine unit and lesson number. 64 unit, lesson = path[3:].split("_") 65 unit = int(unit) 66 if unit not in txt: 67 txt[unit] = {} 68 lesson = int(lesson) 69 # Determine sentences. 70 txt_file = file(os.path.join(subdir, 71 [x for x in os.listdir(subdir) if x.endswith(".TXT")][0])) 72 entries = str(txt_file.read(), codec, errors="ignore") \ 73 .replace(chr(336), "\'") \ 74 .replace(chr(213), "\'") \ 75 .replace(chr(210), "\"") \ 76 .replace(chr(211), "\"") \ 77 .split("@")[1:-1] 78 assert len(entries) == 40 79 txt[unit][lesson] = entries 80 return txt 81 82foreign_txt = get_txt(foreign_directory, foreign_codec) 83native_txt = get_txt(native_directory, native_codec) 84 85# Extract images. 86def extract_images(directory): 87 images = {} 88 for path in sorted(os.listdir(directory)): 89 subdir = os.path.join(directory, path) 90 if os.path.isdir(subdir) and path.startswith("PCT"): 91 # Detemine unit and lesson number. 92 unit, lesson = path[3:].split("_") 93 unit = int(unit) 94 if unit not in images: 95 images[unit] = {} 96 lesson = int(lesson) 97 img_dir = os.path.join(subdir, 98 [x for x in os.listdir(subdir) if x.startswith("P")][0]) 99 img_list = [] 100 for img in sorted(os.listdir(img_dir)): 101 full_path = os.path.join(img_dir, img) 102 if img.endswith("JPG"): 103 shutil.copyfile(full_path, os.path.join(full_media_subdir, img)) 104 img_list.append(media_subdir + "/" + img) 105 if img.endswith("PCT"): 106 os.system("convert " + full_path + " " + \ 107 os.path.join(full_media_subdir, img).replace("PCT", "JPG")) 108 img_list.append(\ 109 media_subdir + "/" + img.replace("PCT", "JPG")) 110 images[unit][lesson] = img_list 111 return images 112 113images = extract_images(foreign_directory) 114 115# Extract sound. 116def extract_sound(directory): 117 sound = {} 118 for path in sorted(os.listdir(directory)): 119 subdir = os.path.join(directory, path) 120 if os.path.isdir(subdir) and not path.startswith("PCT"): 121 # Determine unit and lesson number. 122 unit, lesson = path[3:].split("_") 123 unit = int(unit) 124 if unit not in sound: 125 sound[unit] = {} 126 lesson = int(lesson) 127 snd_dir = os.path.join(subdir, 128 [x for x in os.listdir(subdir) if x.endswith("S")][0]) 129 snd_list = [] 130 for snd in sorted(os.listdir(snd_dir)): 131 full_path = os.path.join(snd_dir, snd) 132 if snd.endswith("SWA"): 133 os.system("mplayer -vo null -vc dummy -af resample=44100 -ao pcm:waveheader " \ 134 + full_path + " && lame audiodump.wav " + \ 135 os.path.join(full_media_subdir, snd).replace("SWA", "MP3")) 136 # High bitrate version, not really needed. 137 #os.system("mplayer -vo null -vc dummy -af resample=44100 -ao pcm:waveheader " \ 138 # + full_path + " && lame -h --resample 44.1 -b 128 audiodump.wav " + \ 139 # os.path.join(full_media_subdir, snd).replace("SWA", "MP3")) 140 snd_list.append(\ 141 media_subdir + "/" + snd.replace("SWA", "MP3")) 142 sound[unit][lesson] = snd_list 143 return sound 144 145sound = extract_sound(foreign_directory) 146 147for unit in foreign_txt: 148 for lesson in foreign_txt[unit]: 149 print(("unit", unit, "lesson", lesson)) 150 for i in range(40): 151 print((foreign_txt[unit][lesson][i])) 152 print((native_txt[unit][lesson][i].replace(chr(336), "\'"))) 153 print((images[unit][lesson][i])) 154 print((sound[unit][lesson][i])) 155 print() 156 fact_data = {"f": "["+foreign_txt[unit][lesson][i] + "]", 157 "p_1": "<audio src=\"" + sound[unit][lesson][i] + "\">", 158 "m_1": native_txt[unit][lesson][i] + \ 159 "\n<img src=\"" + images[unit][lesson][i] + "\">"} 160 mnemosyne.controller().create_new_cards(fact_data, 161 card_type, grade=-1, tag_names=[tag_prefix + "::Unit " + str(unit)\ 162 + "::Lesson " + str(lesson)]) 163 print() 164 165mnemosyne.finalise() 166