1{ 2 "cells": [ 3 { 4 "cell_type": "code", 5 "execution_count": 1, 6 "metadata": { 7 "scrolled": true 8 }, 9 "outputs": [], 10 "source": [ 11 "from download_data import multi_p_run, put_worker, test_worker, download_mp4, download_align" 12 ] 13 }, 14 { 15 "cell_type": "code", 16 "execution_count": 2, 17 "metadata": {}, 18 "outputs": [], 19 "source": [ 20 "import os" 21 ] 22 }, 23 { 24 "cell_type": "code", 25 "execution_count": 3, 26 "metadata": {}, 27 "outputs": [], 28 "source": [ 29 "tot_movies=35" 30 ] 31 }, 32 { 33 "cell_type": "code", 34 "execution_count": null, 35 "metadata": {}, 36 "outputs": [], 37 "source": [] 38 }, 39 { 40 "cell_type": "markdown", 41 "metadata": {}, 42 "source": [ 43 "## TEST" 44 ] 45 }, 46 { 47 "cell_type": "code", 48 "execution_count": 4, 49 "metadata": {}, 50 "outputs": [ 51 { 52 "name": "stdout", 53 "output_type": "stream", 54 "text": [ 55 "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34]\n", 56 "5\n", 57 "35 >> [[0, 7], [7, 14], [14, 21], [21, 28], [28, 35]]\n", 58 "[{'succ': {0, 1, 2, 3, 4, 5, 6}, 'fail': set()}, {'succ': {7, 8, 9, 10, 11, 12, 13}, 'fail': set()}, {'succ': {14, 15, 16, 17, 18, 19, 20}, 'fail': set()}, {'succ': {21, 22, 23, 24, 25, 26, 27}, 'fail': set()}, {'succ': {32, 33, 34, 28, 29, 30, 31}, 'fail': set()}]\n" 59 ] 60 } 61 ], 62 "source": [ 63 "res = multi_p_run(tot_movies, put_worker, test_worker, params={}, n_process=5)\n", 64 "print (res)" 65 ] 66 }, 67 { 68 "cell_type": "markdown", 69 "metadata": {}, 70 "source": [ 71 "## Download Data" 72 ] 73 }, 74 { 75 "cell_type": "markdown", 76 "metadata": {}, 77 "source": [ 78 "### Aligns" 79 ] 80 }, 81 { 82 "cell_type": "code", 83 "execution_count": 5, 84 "metadata": {}, 85 "outputs": [ 86 { 87 "name": "stdout", 88 "output_type": "stream", 89 "text": [ 90 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s0/align/s0.tar && tar -xvf s0.tar\n", 91 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s1/align/s1.tar && tar -xvf s1.tar\n", 92 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s2/align/s2.tar && tar -xvf s2.tar\n", 93 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s3/align/s3.tar && tar -xvf s3.tar\n", 94 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s4/align/s4.tar && tar -xvf s4.tar\n", 95 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s5/align/s5.tar && tar -xvf s5.tar\n", 96 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s6/align/s6.tar && tar -xvf s6.tar\n", 97 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s7/align/s7.tar && tar -xvf s7.tar\n", 98 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s8/align/s8.tar && tar -xvf s8.tar\n", 99 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s9/align/s9.tar && tar -xvf s9.tar\n", 100 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s10/align/s10.tar && tar -xvf s10.tar\n", 101 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s11/align/s11.tar && tar -xvf s11.tar\n", 102 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s12/align/s12.tar && tar -xvf s12.tar\n", 103 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s13/align/s13.tar && tar -xvf s13.tar\n", 104 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s14/align/s14.tar && tar -xvf s14.tar\n", 105 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s15/align/s15.tar && tar -xvf s15.tar\n", 106 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s16/align/s16.tar && tar -xvf s16.tar\n", 107 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s17/align/s17.tar && tar -xvf s17.tar\n", 108 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s18/align/s18.tar && tar -xvf s18.tar\n", 109 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s19/align/s19.tar && tar -xvf s19.tar\n", 110 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s20/align/s20.tar && tar -xvf s20.tar\n", 111 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s21/align/s21.tar && tar -xvf s21.tar\n", 112 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s22/align/s22.tar && tar -xvf s22.tar\n", 113 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s23/align/s23.tar && tar -xvf s23.tar\n", 114 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s24/align/s24.tar && tar -xvf s24.tar\n", 115 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s25/align/s25.tar && tar -xvf s25.tar\n", 116 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s26/align/s26.tar && tar -xvf s26.tar\n", 117 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s27/align/s27.tar && tar -xvf s27.tar\n", 118 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s28/align/s28.tar && tar -xvf s28.tar\n", 119 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s29/align/s29.tar && tar -xvf s29.tar\n", 120 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s30/align/s30.tar && tar -xvf s30.tar\n", 121 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s31/align/s31.tar && tar -xvf s31.tar\n", 122 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s32/align/s32.tar && tar -xvf s32.tar\n", 123 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s33/align/s33.tar && tar -xvf s33.tar\n", 124 "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s34/align/s34.tar && tar -xvf s34.tar\n" 125 ] 126 } 127 ], 128 "source": [ 129 "align_path = '../data/align'\n", 130 "os.makedirs(align_path, exist_ok=True)\n", 131 "\n", 132 "res = download_align(0, tot_movies, {'align_path':align_path})" 133 ] 134 }, 135 { 136 "cell_type": "code", 137 "execution_count": 6, 138 "metadata": {}, 139 "outputs": [ 140 { 141 "name": "stdout", 142 "output_type": "stream", 143 "text": [ 144 "({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34}, set())\n" 145 ] 146 }, 147 { 148 "data": { 149 "text/plain": [ 150 "0" 151 ] 152 }, 153 "execution_count": 6, 154 "metadata": {}, 155 "output_type": "execute_result" 156 } 157 ], 158 "source": [ 159 "print (res)\n", 160 "os.system('rm -f {align_path}/*.tar && rm -f {align_path}/Thumbs.db'.format(align_path=align_path))" 161 ] 162 }, 163 { 164 "cell_type": "code", 165 "execution_count": 7, 166 "metadata": {}, 167 "outputs": [], 168 "source": [ 169 "### Moives(MP4s)" 170 ] 171 }, 172 { 173 "cell_type": "code", 174 "execution_count": 8, 175 "metadata": {}, 176 "outputs": [ 177 { 178 "name": "stdout", 179 "output_type": "stream", 180 "text": [ 181 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s0/video/s0.mpg_vcd.zip --output s0.mpg_vcd.zip && unzip s0.mpg_vcd.zip\n", 182 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s1/video/s1.mpg_vcd.zip --output s1.mpg_vcd.zip && unzip s1.mpg_vcd.zip\n", 183 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s2/video/s2.mpg_vcd.zip --output s2.mpg_vcd.zip && unzip s2.mpg_vcd.zip\n", 184 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s3/video/s3.mpg_vcd.zip --output s3.mpg_vcd.zip && unzip s3.mpg_vcd.zip\n", 185 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s4/video/s4.mpg_vcd.zip --output s4.mpg_vcd.zip && unzip s4.mpg_vcd.zip\n", 186 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s5/video/s5.mpg_vcd.zip --output s5.mpg_vcd.zip && unzip s5.mpg_vcd.zip\n", 187 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s6/video/s6.mpg_vcd.zip --output s6.mpg_vcd.zip && unzip s6.mpg_vcd.zip\n", 188 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s7/video/s7.mpg_vcd.zip --output s7.mpg_vcd.zip && unzip s7.mpg_vcd.zip\n", 189 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s8/video/s8.mpg_vcd.zip --output s8.mpg_vcd.zip && unzip s8.mpg_vcd.zip\n", 190 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s9/video/s9.mpg_vcd.zip --output s9.mpg_vcd.zip && unzip s9.mpg_vcd.zip\n", 191 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s10/video/s10.mpg_vcd.zip --output s10.mpg_vcd.zip && unzip s10.mpg_vcd.zip\n", 192 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s11/video/s11.mpg_vcd.zip --output s11.mpg_vcd.zip && unzip s11.mpg_vcd.zip\n", 193 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s12/video/s12.mpg_vcd.zip --output s12.mpg_vcd.zip && unzip s12.mpg_vcd.zip\n", 194 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s13/video/s13.mpg_vcd.zip --output s13.mpg_vcd.zip && unzip s13.mpg_vcd.zip\n", 195 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s14/video/s14.mpg_vcd.zip --output s14.mpg_vcd.zip && unzip s14.mpg_vcd.zip\n", 196 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s15/video/s15.mpg_vcd.zip --output s15.mpg_vcd.zip && unzip s15.mpg_vcd.zip\n", 197 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s16/video/s16.mpg_vcd.zip --output s16.mpg_vcd.zip && unzip s16.mpg_vcd.zip\n", 198 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s17/video/s17.mpg_vcd.zip --output s17.mpg_vcd.zip && unzip s17.mpg_vcd.zip\n", 199 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s18/video/s18.mpg_vcd.zip --output s18.mpg_vcd.zip && unzip s18.mpg_vcd.zip\n", 200 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s19/video/s19.mpg_vcd.zip --output s19.mpg_vcd.zip && unzip s19.mpg_vcd.zip\n", 201 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s20/video/s20.mpg_vcd.zip --output s20.mpg_vcd.zip && unzip s20.mpg_vcd.zip\n", 202 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s21/video/s21.mpg_vcd.zip --output s21.mpg_vcd.zip && unzip s21.mpg_vcd.zip\n", 203 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s22/video/s22.mpg_vcd.zip --output s22.mpg_vcd.zip && unzip s22.mpg_vcd.zip\n", 204 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s23/video/s23.mpg_vcd.zip --output s23.mpg_vcd.zip && unzip s23.mpg_vcd.zip\n", 205 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s24/video/s24.mpg_vcd.zip --output s24.mpg_vcd.zip && unzip s24.mpg_vcd.zip\n", 206 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s25/video/s25.mpg_vcd.zip --output s25.mpg_vcd.zip && unzip s25.mpg_vcd.zip\n", 207 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s26/video/s26.mpg_vcd.zip --output s26.mpg_vcd.zip && unzip s26.mpg_vcd.zip\n", 208 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s27/video/s27.mpg_vcd.zip --output s27.mpg_vcd.zip && unzip s27.mpg_vcd.zip\n", 209 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s28/video/s28.mpg_vcd.zip --output s28.mpg_vcd.zip && unzip s28.mpg_vcd.zip\n", 210 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s29/video/s29.mpg_vcd.zip --output s29.mpg_vcd.zip && unzip s29.mpg_vcd.zip\n", 211 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s30/video/s30.mpg_vcd.zip --output s30.mpg_vcd.zip && unzip s30.mpg_vcd.zip\n", 212 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s31/video/s31.mpg_vcd.zip --output s31.mpg_vcd.zip && unzip s31.mpg_vcd.zip\n", 213 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s32/video/s32.mpg_vcd.zip --output s32.mpg_vcd.zip && unzip s32.mpg_vcd.zip\n", 214 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s33/video/s33.mpg_vcd.zip --output s33.mpg_vcd.zip && unzip s33.mpg_vcd.zip\n", 215 "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s34/video/s34.mpg_vcd.zip --output s34.mpg_vcd.zip && unzip s34.mpg_vcd.zip\n" 216 ] 217 } 218 ], 219 "source": [ 220 "src_path = '../data/mp4s'\n", 221 "res = download_mp4(0, tot_movies, {'src_path':src_path})" 222 ] 223 }, 224 { 225 "cell_type": "code", 226 "execution_count": 9, 227 "metadata": {}, 228 "outputs": [ 229 { 230 "name": "stdout", 231 "output_type": "stream", 232 "text": [ 233 "({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34}, set())\n" 234 ] 235 }, 236 { 237 "data": { 238 "text/plain": [ 239 "0" 240 ] 241 }, 242 "execution_count": 9, 243 "metadata": {}, 244 "output_type": "execute_result" 245 } 246 ], 247 "source": [ 248 "print (res)\n", 249 "os.system('rm -f {src_path}/*.zip && rm -f {src_path}/*/Thumbs.db'.format(src_path=src_path))" 250 ] 251 }, 252 { 253 "cell_type": "code", 254 "execution_count": null, 255 "metadata": {}, 256 "outputs": [], 257 "source": [] 258 }, 259 { 260 "cell_type": "markdown", 261 "metadata": {}, 262 "source": [ 263 "## Preprocess Data" 264 ] 265 }, 266 { 267 "cell_type": "code", 268 "execution_count": 10, 269 "metadata": {}, 270 "outputs": [], 271 "source": [ 272 "from preprocess_data import preprocess, find_files, Video" 273 ] 274 }, 275 { 276 "cell_type": "code", 277 "execution_count": 11, 278 "metadata": {}, 279 "outputs": [], 280 "source": [ 281 "tgt_path = '../data/datasets'" 282 ] 283 }, 284 { 285 "cell_type": "code", 286 "execution_count": 12, 287 "metadata": {}, 288 "outputs": [ 289 { 290 "data": { 291 "text/plain": [ 292 "0" 293 ] 294 }, 295 "execution_count": 12, 296 "metadata": {}, 297 "output_type": "execute_result" 298 } 299 ], 300 "source": [ 301 "os.makedirs('{tgt_path}'.format(tgt_path=tgt_path), exist_ok=True)\n", 302 "os.system('rm -rf {tgt_path}'.format(tgt_path=tgt_path))" 303 ] 304 }, 305 { 306 "cell_type": "code", 307 "execution_count": 13, 308 "metadata": {}, 309 "outputs": [], 310 "source": [ 311 "res = preprocess(0, tot_movies, {'src_path':src_path, 'tgt_path':tgt_path})" 312 ] 313 }, 314 { 315 "cell_type": "code", 316 "execution_count": 14, 317 "metadata": {}, 318 "outputs": [ 319 { 320 "name": "stdout", 321 "output_type": "stream", 322 "text": [ 323 "({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34}, set())\n" 324 ] 325 } 326 ], 327 "source": [ 328 "print (res)" 329 ] 330 }, 331 { 332 "cell_type": "code", 333 "execution_count": null, 334 "metadata": {}, 335 "outputs": [], 336 "source": [] 337 } 338 ], 339 "metadata": { 340 "kernelspec": { 341 "display_name": "Python [default]", 342 "language": "python", 343 "name": "python3" 344 }, 345 "language_info": { 346 "codemirror_mode": { 347 "name": "ipython", 348 "version": 3 349 }, 350 "file_extension": ".py", 351 "mimetype": "text/x-python", 352 "name": "python", 353 "nbconvert_exporter": "python", 354 "pygments_lexer": "ipython3", 355 "version": "3.6.4" 356 } 357 }, 358 "nbformat": 4, 359 "nbformat_minor": 2 360} 361