1{
2 "cells": [
3  {
4   "cell_type": "code",
5   "execution_count": 1,
6   "metadata": {
7    "scrolled": true
8   },
9   "outputs": [],
10   "source": [
11    "from download_data import multi_p_run, put_worker, test_worker, download_mp4, download_align"
12   ]
13  },
14  {
15   "cell_type": "code",
16   "execution_count": 2,
17   "metadata": {},
18   "outputs": [],
19   "source": [
20    "import os"
21   ]
22  },
23  {
24   "cell_type": "code",
25   "execution_count": 3,
26   "metadata": {},
27   "outputs": [],
28   "source": [
29    "tot_movies=35"
30   ]
31  },
32  {
33   "cell_type": "code",
34   "execution_count": null,
35   "metadata": {},
36   "outputs": [],
37   "source": []
38  },
39  {
40   "cell_type": "markdown",
41   "metadata": {},
42   "source": [
43    "## TEST"
44   ]
45  },
46  {
47   "cell_type": "code",
48   "execution_count": 4,
49   "metadata": {},
50   "outputs": [
51    {
52     "name": "stdout",
53     "output_type": "stream",
54     "text": [
55      "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34]\n",
56      "5\n",
57      "35 >> [[0, 7], [7, 14], [14, 21], [21, 28], [28, 35]]\n",
58      "[{'succ': {0, 1, 2, 3, 4, 5, 6}, 'fail': set()}, {'succ': {7, 8, 9, 10, 11, 12, 13}, 'fail': set()}, {'succ': {14, 15, 16, 17, 18, 19, 20}, 'fail': set()}, {'succ': {21, 22, 23, 24, 25, 26, 27}, 'fail': set()}, {'succ': {32, 33, 34, 28, 29, 30, 31}, 'fail': set()}]\n"
59     ]
60    }
61   ],
62   "source": [
63    "res = multi_p_run(tot_movies, put_worker, test_worker, params={}, n_process=5)\n",
64    "print (res)"
65   ]
66  },
67  {
68   "cell_type": "markdown",
69   "metadata": {},
70   "source": [
71    "## Download Data"
72   ]
73  },
74  {
75   "cell_type": "markdown",
76   "metadata": {},
77   "source": [
78    "### Aligns"
79   ]
80  },
81  {
82   "cell_type": "code",
83   "execution_count": 5,
84   "metadata": {},
85   "outputs": [
86    {
87     "name": "stdout",
88     "output_type": "stream",
89     "text": [
90      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s0/align/s0.tar && tar -xvf s0.tar\n",
91      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s1/align/s1.tar && tar -xvf s1.tar\n",
92      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s2/align/s2.tar && tar -xvf s2.tar\n",
93      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s3/align/s3.tar && tar -xvf s3.tar\n",
94      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s4/align/s4.tar && tar -xvf s4.tar\n",
95      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s5/align/s5.tar && tar -xvf s5.tar\n",
96      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s6/align/s6.tar && tar -xvf s6.tar\n",
97      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s7/align/s7.tar && tar -xvf s7.tar\n",
98      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s8/align/s8.tar && tar -xvf s8.tar\n",
99      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s9/align/s9.tar && tar -xvf s9.tar\n",
100      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s10/align/s10.tar && tar -xvf s10.tar\n",
101      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s11/align/s11.tar && tar -xvf s11.tar\n",
102      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s12/align/s12.tar && tar -xvf s12.tar\n",
103      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s13/align/s13.tar && tar -xvf s13.tar\n",
104      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s14/align/s14.tar && tar -xvf s14.tar\n",
105      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s15/align/s15.tar && tar -xvf s15.tar\n",
106      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s16/align/s16.tar && tar -xvf s16.tar\n",
107      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s17/align/s17.tar && tar -xvf s17.tar\n",
108      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s18/align/s18.tar && tar -xvf s18.tar\n",
109      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s19/align/s19.tar && tar -xvf s19.tar\n",
110      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s20/align/s20.tar && tar -xvf s20.tar\n",
111      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s21/align/s21.tar && tar -xvf s21.tar\n",
112      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s22/align/s22.tar && tar -xvf s22.tar\n",
113      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s23/align/s23.tar && tar -xvf s23.tar\n",
114      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s24/align/s24.tar && tar -xvf s24.tar\n",
115      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s25/align/s25.tar && tar -xvf s25.tar\n",
116      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s26/align/s26.tar && tar -xvf s26.tar\n",
117      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s27/align/s27.tar && tar -xvf s27.tar\n",
118      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s28/align/s28.tar && tar -xvf s28.tar\n",
119      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s29/align/s29.tar && tar -xvf s29.tar\n",
120      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s30/align/s30.tar && tar -xvf s30.tar\n",
121      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s31/align/s31.tar && tar -xvf s31.tar\n",
122      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s32/align/s32.tar && tar -xvf s32.tar\n",
123      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s33/align/s33.tar && tar -xvf s33.tar\n",
124      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s34/align/s34.tar && tar -xvf s34.tar\n"
125     ]
126    }
127   ],
128   "source": [
129    "align_path = '../data/align'\n",
130    "os.makedirs(align_path, exist_ok=True)\n",
131    "\n",
132    "res = download_align(0, tot_movies, {'align_path':align_path})"
133   ]
134  },
135  {
136   "cell_type": "code",
137   "execution_count": 6,
138   "metadata": {},
139   "outputs": [
140    {
141     "name": "stdout",
142     "output_type": "stream",
143     "text": [
144      "({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34}, set())\n"
145     ]
146    },
147    {
148     "data": {
149      "text/plain": [
150       "0"
151      ]
152     },
153     "execution_count": 6,
154     "metadata": {},
155     "output_type": "execute_result"
156    }
157   ],
158   "source": [
159    "print (res)\n",
160    "os.system('rm -f {align_path}/*.tar && rm -f {align_path}/Thumbs.db'.format(align_path=align_path))"
161   ]
162  },
163  {
164   "cell_type": "code",
165   "execution_count": 7,
166   "metadata": {},
167   "outputs": [],
168   "source": [
169    "### Moives(MP4s)"
170   ]
171  },
172  {
173   "cell_type": "code",
174   "execution_count": 8,
175   "metadata": {},
176   "outputs": [
177    {
178     "name": "stdout",
179     "output_type": "stream",
180     "text": [
181      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s0/video/s0.mpg_vcd.zip --output s0.mpg_vcd.zip && unzip s0.mpg_vcd.zip\n",
182      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s1/video/s1.mpg_vcd.zip --output s1.mpg_vcd.zip && unzip s1.mpg_vcd.zip\n",
183      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s2/video/s2.mpg_vcd.zip --output s2.mpg_vcd.zip && unzip s2.mpg_vcd.zip\n",
184      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s3/video/s3.mpg_vcd.zip --output s3.mpg_vcd.zip && unzip s3.mpg_vcd.zip\n",
185      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s4/video/s4.mpg_vcd.zip --output s4.mpg_vcd.zip && unzip s4.mpg_vcd.zip\n",
186      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s5/video/s5.mpg_vcd.zip --output s5.mpg_vcd.zip && unzip s5.mpg_vcd.zip\n",
187      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s6/video/s6.mpg_vcd.zip --output s6.mpg_vcd.zip && unzip s6.mpg_vcd.zip\n",
188      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s7/video/s7.mpg_vcd.zip --output s7.mpg_vcd.zip && unzip s7.mpg_vcd.zip\n",
189      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s8/video/s8.mpg_vcd.zip --output s8.mpg_vcd.zip && unzip s8.mpg_vcd.zip\n",
190      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s9/video/s9.mpg_vcd.zip --output s9.mpg_vcd.zip && unzip s9.mpg_vcd.zip\n",
191      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s10/video/s10.mpg_vcd.zip --output s10.mpg_vcd.zip && unzip s10.mpg_vcd.zip\n",
192      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s11/video/s11.mpg_vcd.zip --output s11.mpg_vcd.zip && unzip s11.mpg_vcd.zip\n",
193      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s12/video/s12.mpg_vcd.zip --output s12.mpg_vcd.zip && unzip s12.mpg_vcd.zip\n",
194      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s13/video/s13.mpg_vcd.zip --output s13.mpg_vcd.zip && unzip s13.mpg_vcd.zip\n",
195      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s14/video/s14.mpg_vcd.zip --output s14.mpg_vcd.zip && unzip s14.mpg_vcd.zip\n",
196      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s15/video/s15.mpg_vcd.zip --output s15.mpg_vcd.zip && unzip s15.mpg_vcd.zip\n",
197      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s16/video/s16.mpg_vcd.zip --output s16.mpg_vcd.zip && unzip s16.mpg_vcd.zip\n",
198      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s17/video/s17.mpg_vcd.zip --output s17.mpg_vcd.zip && unzip s17.mpg_vcd.zip\n",
199      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s18/video/s18.mpg_vcd.zip --output s18.mpg_vcd.zip && unzip s18.mpg_vcd.zip\n",
200      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s19/video/s19.mpg_vcd.zip --output s19.mpg_vcd.zip && unzip s19.mpg_vcd.zip\n",
201      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s20/video/s20.mpg_vcd.zip --output s20.mpg_vcd.zip && unzip s20.mpg_vcd.zip\n",
202      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s21/video/s21.mpg_vcd.zip --output s21.mpg_vcd.zip && unzip s21.mpg_vcd.zip\n",
203      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s22/video/s22.mpg_vcd.zip --output s22.mpg_vcd.zip && unzip s22.mpg_vcd.zip\n",
204      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s23/video/s23.mpg_vcd.zip --output s23.mpg_vcd.zip && unzip s23.mpg_vcd.zip\n",
205      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s24/video/s24.mpg_vcd.zip --output s24.mpg_vcd.zip && unzip s24.mpg_vcd.zip\n",
206      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s25/video/s25.mpg_vcd.zip --output s25.mpg_vcd.zip && unzip s25.mpg_vcd.zip\n",
207      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s26/video/s26.mpg_vcd.zip --output s26.mpg_vcd.zip && unzip s26.mpg_vcd.zip\n",
208      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s27/video/s27.mpg_vcd.zip --output s27.mpg_vcd.zip && unzip s27.mpg_vcd.zip\n",
209      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s28/video/s28.mpg_vcd.zip --output s28.mpg_vcd.zip && unzip s28.mpg_vcd.zip\n",
210      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s29/video/s29.mpg_vcd.zip --output s29.mpg_vcd.zip && unzip s29.mpg_vcd.zip\n",
211      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s30/video/s30.mpg_vcd.zip --output s30.mpg_vcd.zip && unzip s30.mpg_vcd.zip\n",
212      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s31/video/s31.mpg_vcd.zip --output s31.mpg_vcd.zip && unzip s31.mpg_vcd.zip\n",
213      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s32/video/s32.mpg_vcd.zip --output s32.mpg_vcd.zip && unzip s32.mpg_vcd.zip\n",
214      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s33/video/s33.mpg_vcd.zip --output s33.mpg_vcd.zip && unzip s33.mpg_vcd.zip\n",
215      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s34/video/s34.mpg_vcd.zip --output s34.mpg_vcd.zip && unzip s34.mpg_vcd.zip\n"
216     ]
217    }
218   ],
219   "source": [
220    "src_path = '../data/mp4s'\n",
221    "res = download_mp4(0, tot_movies, {'src_path':src_path})"
222   ]
223  },
224  {
225   "cell_type": "code",
226   "execution_count": 9,
227   "metadata": {},
228   "outputs": [
229    {
230     "name": "stdout",
231     "output_type": "stream",
232     "text": [
233      "({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34}, set())\n"
234     ]
235    },
236    {
237     "data": {
238      "text/plain": [
239       "0"
240      ]
241     },
242     "execution_count": 9,
243     "metadata": {},
244     "output_type": "execute_result"
245    }
246   ],
247   "source": [
248    "print (res)\n",
249    "os.system('rm -f {src_path}/*.zip && rm -f {src_path}/*/Thumbs.db'.format(src_path=src_path))"
250   ]
251  },
252  {
253   "cell_type": "code",
254   "execution_count": null,
255   "metadata": {},
256   "outputs": [],
257   "source": []
258  },
259  {
260   "cell_type": "markdown",
261   "metadata": {},
262   "source": [
263    "## Preprocess Data"
264   ]
265  },
266  {
267   "cell_type": "code",
268   "execution_count": 10,
269   "metadata": {},
270   "outputs": [],
271   "source": [
272    "from preprocess_data import preprocess, find_files, Video"
273   ]
274  },
275  {
276   "cell_type": "code",
277   "execution_count": 11,
278   "metadata": {},
279   "outputs": [],
280   "source": [
281    "tgt_path = '../data/datasets'"
282   ]
283  },
284  {
285   "cell_type": "code",
286   "execution_count": 12,
287   "metadata": {},
288   "outputs": [
289    {
290     "data": {
291      "text/plain": [
292       "0"
293      ]
294     },
295     "execution_count": 12,
296     "metadata": {},
297     "output_type": "execute_result"
298    }
299   ],
300   "source": [
301    "os.makedirs('{tgt_path}'.format(tgt_path=tgt_path), exist_ok=True)\n",
302    "os.system('rm -rf {tgt_path}'.format(tgt_path=tgt_path))"
303   ]
304  },
305  {
306   "cell_type": "code",
307   "execution_count": 13,
308   "metadata": {},
309   "outputs": [],
310   "source": [
311    "res = preprocess(0, tot_movies, {'src_path':src_path, 'tgt_path':tgt_path})"
312   ]
313  },
314  {
315   "cell_type": "code",
316   "execution_count": 14,
317   "metadata": {},
318   "outputs": [
319    {
320     "name": "stdout",
321     "output_type": "stream",
322     "text": [
323      "({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34}, set())\n"
324     ]
325    }
326   ],
327   "source": [
328    "print (res)"
329   ]
330  },
331  {
332   "cell_type": "code",
333   "execution_count": null,
334   "metadata": {},
335   "outputs": [],
336   "source": []
337  }
338 ],
339 "metadata": {
340  "kernelspec": {
341   "display_name": "Python [default]",
342   "language": "python",
343   "name": "python3"
344  },
345  "language_info": {
346   "codemirror_mode": {
347    "name": "ipython",
348    "version": 3
349   },
350   "file_extension": ".py",
351   "mimetype": "text/x-python",
352   "name": "python",
353   "nbconvert_exporter": "python",
354   "pygments_lexer": "ipython3",
355   "version": "3.6.4"
356  }
357 },
358 "nbformat": 4,
359 "nbformat_minor": 2
360}
361