1 /* libs/pixelflinger/codeflinger/GGLAssembler.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 **     http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17 
18 #define LOG_TAG "GGLAssembler"
19 
20 #include <assert.h>
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/types.h>
25 
26 #include <log/log.h>
27 
28 #include "GGLAssembler.h"
29 
30 namespace android {
31 
32 // ----------------------------------------------------------------------------
33 
GGLAssembler(ARMAssemblerInterface * target)34 GGLAssembler::GGLAssembler(ARMAssemblerInterface* target)
35     : ARMAssemblerProxy(target),
36       RegisterAllocator(ARMAssemblerProxy::getCodegenArch()), mOptLevel(7)
37 {
38 }
39 
~GGLAssembler()40 GGLAssembler::~GGLAssembler()
41 {
42 }
43 
prolog()44 void GGLAssembler::prolog()
45 {
46     ARMAssemblerProxy::prolog();
47 }
48 
epilog(uint32_t touched)49 void GGLAssembler::epilog(uint32_t touched)
50 {
51     ARMAssemblerProxy::epilog(touched);
52 }
53 
reset(int opt_level)54 void GGLAssembler::reset(int opt_level)
55 {
56     ARMAssemblerProxy::reset();
57     RegisterAllocator::reset();
58     mOptLevel = opt_level;
59 }
60 
61 // ---------------------------------------------------------------------------
62 
scanline(const needs_t & needs,context_t const * c)63 int GGLAssembler::scanline(const needs_t& needs, context_t const* c)
64 {
65     int err = 0;
66     int opt_level = mOptLevel;
67     while (opt_level >= 0) {
68         reset(opt_level);
69         err = scanline_core(needs, c);
70         if (err == 0)
71             break;
72         opt_level--;
73     }
74 
75     // XXX: in theory, pcForLabel is not valid before generate()
76     uint32_t* fragment_start_pc = pcForLabel("fragment_loop");
77     uint32_t* fragment_end_pc = pcForLabel("epilog");
78     const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc);
79 
80     // build a name for our pipeline
81     char name[64];
82     sprintf(name,
83             "scanline__%08X:%08X_%08X_%08X [%3d ipp]",
84             needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops);
85 
86     if (err) {
87         ALOGE("Error while generating ""%s""\n", name);
88         disassemble(name);
89         return -1;
90     }
91 
92     return generate(name);
93 }
94 
scanline_core(const needs_t & needs,context_t const * c)95 int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
96 {
97     mBlendFactorCached = 0;
98     mBlending = 0;
99     mMasking = 0;
100     mAA        = GGL_READ_NEEDS(P_AA, needs.p);
101     mDithering = GGL_READ_NEEDS(P_DITHER, needs.p);
102     mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER;
103     mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER;
104     mFog       = GGL_READ_NEEDS(P_FOG, needs.p) != 0;
105     mSmooth    = GGL_READ_NEEDS(SHADE, needs.n) != 0;
106     mBuilderContext.needs = needs;
107     mBuilderContext.c = c;
108     mBuilderContext.Rctx = reserveReg(R0); // context always in R0
109     mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ];
110 
111     // ------------------------------------------------------------------------
112 
113     decodeLogicOpNeeds(needs);
114 
115     decodeTMUNeeds(needs, c);
116 
117     mBlendSrc  = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n));
118     mBlendDst  = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n));
119     mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n));
120     mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n));
121 
122     if (!mCbFormat.c[GGLFormat::ALPHA].h) {
123         if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) ||
124             (mBlendSrc == GGL_DST_ALPHA)) {
125             mBlendSrc = GGL_ONE;
126         }
127         if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) ||
128             (mBlendSrcA == GGL_DST_ALPHA)) {
129             mBlendSrcA = GGL_ONE;
130         }
131         if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) ||
132             (mBlendDst == GGL_DST_ALPHA)) {
133             mBlendDst = GGL_ONE;
134         }
135         if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) ||
136             (mBlendDstA == GGL_DST_ALPHA)) {
137             mBlendDstA = GGL_ONE;
138         }
139     }
140 
141     // if we need the framebuffer, read it now
142     const int blending =    blending_codes(mBlendSrc, mBlendDst) |
143                             blending_codes(mBlendSrcA, mBlendDstA);
144 
145     // XXX: handle special cases, destination not modified...
146     if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
147         (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) {
148         // Destination unmodified (beware of logic ops)
149     } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
150         (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) {
151         // Destination is zero (beware of logic ops)
152     }
153 
154     int fbComponents = 0;
155     const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n);
156     for (int i=0 ; i<4 ; i++) {
157         const int mask = 1<<i;
158         component_info_t& info = mInfo[i];
159         int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
160         int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
161         if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA)
162             fs = GGL_ONE;
163         info.masked =   !!(masking & mask);
164         info.inDest =   !info.masked && mCbFormat.c[i].h &&
165                         ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp));
166         if (mCbFormat.components >= GGL_LUMINANCE &&
167                 (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) {
168             info.inDest = false;
169         }
170         info.needed =   (i==GGLFormat::ALPHA) &&
171                         (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS);
172         info.replaced = !!(mTextureMachine.replaced & mask);
173         info.iterated = (!info.replaced && (info.inDest || info.needed));
174         info.smooth =   mSmooth && info.iterated;
175         info.fog =      mFog && info.inDest && (i != GGLFormat::ALPHA);
176         info.blend =    (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
177 
178         mBlending |= (info.blend ? mask : 0);
179         mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0;
180         fbComponents |= mCbFormat.c[i].h ? mask : 0;
181     }
182 
183     mAllMasked = (mMasking == fbComponents);
184     if (mAllMasked) {
185         mDithering = 0;
186     }
187 
188     fragment_parts_t parts;
189 
190     // ------------------------------------------------------------------------
191     prolog();
192     // ------------------------------------------------------------------------
193 
194     build_scanline_prolog(parts, needs);
195 
196     if (registerFile().status())
197         return registerFile().status();
198 
199     // ------------------------------------------------------------------------
200     label("fragment_loop");
201     // ------------------------------------------------------------------------
202     {
203         Scratch regs(registerFile());
204 
205         if (mDithering) {
206             // update the dither index.
207             MOV(AL, 0, parts.count.reg,
208                     reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT));
209             ADD(AL, 0, parts.count.reg, parts.count.reg,
210                     imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT)));
211             MOV(AL, 0, parts.count.reg,
212                     reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT));
213         }
214 
215         // XXX: could we do an early alpha-test here in some cases?
216         // It would probaly be used only with smooth-alpha and no texture
217         // (or no alpha component in the texture).
218 
219         // Early z-test
220         if (mAlphaTest==GGL_ALWAYS) {
221             build_depth_test(parts, Z_TEST|Z_WRITE);
222         } else {
223             // we cannot do the z-write here, because
224             // it might be killed by the alpha-test later
225             build_depth_test(parts, Z_TEST);
226         }
227 
228         { // texture coordinates
229             Scratch scratches(registerFile());
230 
231             // texel generation
232             build_textures(parts, regs);
233             if (registerFile().status())
234                 return registerFile().status();
235         }
236 
237         if ((blending & (FACTOR_DST|BLEND_DST)) ||
238                 (mMasking && !mAllMasked) ||
239                 (mLogicOp & LOGIC_OP_DST))
240         {
241             // blending / logic_op / masking need the framebuffer
242             mDstPixel.setTo(regs.obtain(), &mCbFormat);
243 
244             // load the framebuffer pixel
245             comment("fetch color-buffer");
246             load(parts.cbPtr, mDstPixel);
247         }
248 
249         if (registerFile().status())
250             return registerFile().status();
251 
252         pixel_t pixel;
253         int directTex = mTextureMachine.directTexture;
254         if (directTex | parts.packed) {
255             // note: we can't have both here
256             // iterated color or direct texture
257             pixel = directTex ? parts.texel[directTex-1] : parts.iterated;
258             pixel.flags &= ~CORRUPTIBLE;
259         } else {
260             if (mDithering) {
261                 const int ctxtReg = mBuilderContext.Rctx;
262                 const int mask = GGL_DITHER_SIZE-1;
263                 parts.dither = reg_t(regs.obtain());
264                 AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
265                 ADDR_ADD(AL, 0, parts.dither.reg, ctxtReg, parts.dither.reg);
266                 LDRB(AL, parts.dither.reg, parts.dither.reg,
267                         immed12_pre(GGL_OFFSETOF(ditherMatrix)));
268             }
269 
270             // allocate a register for the resulting pixel
271             pixel.setTo(regs.obtain(), &mCbFormat, FIRST);
272 
273             build_component(pixel, parts, GGLFormat::ALPHA,    regs);
274 
275             if (mAlphaTest!=GGL_ALWAYS) {
276                 // only handle the z-write part here. We know z-test
277                 // was successful, as well as alpha-test.
278                 build_depth_test(parts, Z_WRITE);
279             }
280 
281             build_component(pixel, parts, GGLFormat::RED,      regs);
282             build_component(pixel, parts, GGLFormat::GREEN,    regs);
283             build_component(pixel, parts, GGLFormat::BLUE,     regs);
284 
285             pixel.flags |= CORRUPTIBLE;
286         }
287 
288         if (registerFile().status())
289             return registerFile().status();
290 
291         if (pixel.reg == -1) {
292             // be defensive here. if we're here it's probably
293             // that this whole fragment is a no-op.
294             pixel = mDstPixel;
295         }
296 
297         if (!mAllMasked) {
298             // logic operation
299             build_logic_op(pixel, regs);
300 
301             // masking
302             build_masking(pixel, regs);
303 
304             comment("store");
305             store(parts.cbPtr, pixel, WRITE_BACK);
306         }
307     }
308 
309     if (registerFile().status())
310         return registerFile().status();
311 
312     // update the iterated color...
313     if (parts.reload != 3) {
314         build_smooth_shade(parts);
315     }
316 
317     // update iterated z
318     build_iterate_z(parts);
319 
320     // update iterated fog
321     build_iterate_f(parts);
322 
323     SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
324     B(PL, "fragment_loop");
325     label("epilog");
326     epilog(registerFile().touched());
327 
328     if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) {
329         if (mDepthTest!=GGL_ALWAYS) {
330             label("discard_before_textures");
331             build_iterate_texture_coordinates(parts);
332         }
333         label("discard_after_textures");
334         build_smooth_shade(parts);
335         build_iterate_z(parts);
336         build_iterate_f(parts);
337         if (!mAllMasked) {
338             ADDR_ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
339         }
340         SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
341         B(PL, "fragment_loop");
342         epilog(registerFile().touched());
343     }
344 
345     return registerFile().status();
346 }
347 
348 // ---------------------------------------------------------------------------
349 
build_scanline_prolog(fragment_parts_t & parts,const needs_t & needs)350 void GGLAssembler::build_scanline_prolog(
351     fragment_parts_t& parts, const needs_t& needs)
352 {
353     Scratch scratches(registerFile());
354 
355     // compute count
356     comment("compute ct (# of pixels to process)");
357     parts.count.setTo(obtainReg());
358     int Rx = scratches.obtain();
359     int Ry = scratches.obtain();
360     CONTEXT_LOAD(Rx, iterators.xl);
361     CONTEXT_LOAD(parts.count.reg, iterators.xr);
362     CONTEXT_LOAD(Ry, iterators.y);
363 
364     // parts.count = iterators.xr - Rx
365     SUB(AL, 0, parts.count.reg, parts.count.reg, Rx);
366     SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1));
367 
368     if (mDithering) {
369         // parts.count.reg = 0xNNNNXXDD
370         // NNNN = count-1
371         // DD   = dither offset
372         // XX   = 0xxxxxxx (x = garbage)
373         Scratch scratches(registerFile());
374         int tx = scratches.obtain();
375         int ty = scratches.obtain();
376         AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK));
377         AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK));
378         ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT));
379         ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16));
380     } else {
381         // parts.count.reg = 0xNNNN0000
382         // NNNN = count-1
383         MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16));
384     }
385 
386     if (!mAllMasked) {
387         // compute dst ptr
388         comment("compute color-buffer pointer");
389         const int cb_bits = mCbFormat.size*8;
390         int Rs = scratches.obtain();
391         parts.cbPtr.setTo(obtainReg(), cb_bits);
392         CONTEXT_LOAD(Rs, state.buffers.color.stride);
393         CONTEXT_ADDR_LOAD(parts.cbPtr.reg, state.buffers.color.data);
394         SMLABB(AL, Rs, Ry, Rs, Rx);  // Rs = Rx + Ry*Rs
395         base_offset(parts.cbPtr, parts.cbPtr, Rs);
396         scratches.recycle(Rs);
397     }
398 
399     // init fog
400     const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p);
401     if (need_fog) {
402         comment("compute initial fog coordinate");
403         Scratch scratches(registerFile());
404         int dfdx = scratches.obtain();
405         int ydfdy = scratches.obtain();
406         int f = ydfdy;
407         CONTEXT_LOAD(dfdx,  generated_vars.dfdx);
408         CONTEXT_LOAD(ydfdy, iterators.ydfdy);
409         MLA(AL, 0, f, Rx, dfdx, ydfdy);
410         CONTEXT_STORE(f, generated_vars.f);
411     }
412 
413     // init Z coordinate
414     if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
415         parts.z = reg_t(obtainReg());
416         comment("compute initial Z coordinate");
417         Scratch scratches(registerFile());
418         int dzdx = scratches.obtain();
419         int ydzdy = parts.z.reg;
420         CONTEXT_LOAD(dzdx,  generated_vars.dzdx);   // 1.31 fixed-point
421         CONTEXT_LOAD(ydzdy, iterators.ydzdy);       // 1.31 fixed-point
422         MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy);
423 
424         // we're going to index zbase of parts.count
425         // zbase = base + (xl-count + stride*y)*2
426         int Rs = dzdx;
427         int zbase = scratches.obtain();
428         CONTEXT_LOAD(Rs, state.buffers.depth.stride);
429         CONTEXT_ADDR_LOAD(zbase, state.buffers.depth.data);
430         SMLABB(AL, Rs, Ry, Rs, Rx);
431         ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
432         ADDR_ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
433         CONTEXT_ADDR_STORE(zbase, generated_vars.zbase);
434     }
435 
436     // init texture coordinates
437     init_textures(parts.coords, reg_t(Rx), reg_t(Ry));
438     scratches.recycle(Ry);
439 
440     // iterated color
441     init_iterated_color(parts, reg_t(Rx));
442 
443     // init coverage factor application (anti-aliasing)
444     if (mAA) {
445         parts.covPtr.setTo(obtainReg(), 16);
446         CONTEXT_ADDR_LOAD(parts.covPtr.reg, state.buffers.coverage);
447         ADDR_ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
448     }
449 }
450 
451 // ---------------------------------------------------------------------------
452 
build_component(pixel_t & pixel,const fragment_parts_t & parts,int component,Scratch & regs)453 void GGLAssembler::build_component( pixel_t& pixel,
454                                     const fragment_parts_t& parts,
455                                     int component,
456                                     Scratch& regs)
457 {
458     static char const * comments[] = {"alpha", "red", "green", "blue"};
459     comment(comments[component]);
460 
461     // local register file
462     Scratch scratches(registerFile());
463     const int dst_component_size = pixel.component_size(component);
464 
465     component_t temp(-1);
466     build_incoming_component( temp, dst_component_size,
467             parts, component, scratches, regs);
468 
469     if (mInfo[component].inDest) {
470 
471         // blending...
472         build_blending( temp, mDstPixel, component, scratches );
473 
474         // downshift component and rebuild pixel...
475         downshift(pixel, component, temp, parts.dither);
476     }
477 }
478 
build_incoming_component(component_t & temp,int dst_size,const fragment_parts_t & parts,int component,Scratch & scratches,Scratch & global_regs)479 void GGLAssembler::build_incoming_component(
480                                     component_t& temp,
481                                     int dst_size,
482                                     const fragment_parts_t& parts,
483                                     int component,
484                                     Scratch& scratches,
485                                     Scratch& global_regs)
486 {
487     const uint32_t component_mask = 1<<component;
488 
489     // Figure out what we need for the blending stage...
490     int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
491     int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
492     if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) {
493         fs = GGL_ONE;
494     }
495 
496     // Figure out what we need to extract and for what reason
497     const int blending = blending_codes(fs, fd);
498 
499     // Are we actually going to blend?
500     const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
501 
502     // expand the source if the destination has more bits
503     int need_expander = false;
504     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) {
505         texture_unit_t& tmu = mTextureMachine.tmu[i];
506         if ((tmu.format_idx) &&
507             (parts.texel[i].component_size(component) < dst_size)) {
508             need_expander = true;
509         }
510     }
511 
512     // do we need to extract this component?
513     const bool multiTexture = mTextureMachine.activeUnits > 1;
514     const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) &&
515                                         (isAlphaSourceNeeded());
516     int need_extract = mInfo[component].needed;
517     if (mInfo[component].inDest)
518     {
519         need_extract |= ((need_blending ?
520                 (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander));
521         need_extract |= (mTextureMachine.mask != mTextureMachine.replaced);
522         need_extract |= mInfo[component].smooth;
523         need_extract |= mInfo[component].fog;
524         need_extract |= mDithering;
525         need_extract |= multiTexture;
526     }
527 
528     if (need_extract) {
529         Scratch& regs = blend_needs_alpha_source ? global_regs : scratches;
530         component_t fragment;
531 
532         // iterated color
533         build_iterated_color(fragment, parts, component, regs);
534 
535         // texture environement (decal, modulate, replace)
536         build_texture_environment(fragment, parts, component, regs);
537 
538         // expand the source if the destination has more bits
539         if (need_expander && (fragment.size() < dst_size)) {
540             // we're here only if we fetched a texel
541             // (so we know for sure fragment is CORRUPTIBLE)
542             expand(fragment, fragment, dst_size);
543         }
544 
545         // We have a few specific things to do for the alpha-channel
546         if ((component==GGLFormat::ALPHA) &&
547             (mInfo[component].needed || fragment.size()<dst_size))
548         {
549             // convert to integer_t first and make sure
550             // we don't corrupt a needed register
551             if (fragment.l) {
552                 component_t incoming(fragment);
553                 modify(fragment, regs);
554                 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l));
555                 fragment.h -= fragment.l;
556                 fragment.l = 0;
557             }
558 
559             // coverage factor application
560             build_coverage_application(fragment, parts, regs);
561 
562             // alpha-test
563             build_alpha_test(fragment, parts);
564 
565             if (blend_needs_alpha_source) {
566                 // We keep only 8 bits for the blending stage
567                 const int shift = fragment.h <= 8 ? 0 : fragment.h-8;
568                 if (fragment.flags & CORRUPTIBLE) {
569                     fragment.flags &= ~CORRUPTIBLE;
570                     mAlphaSource.setTo(fragment.reg,
571                             fragment.size(), fragment.flags);
572                     if (shift) {
573                         MOV(AL, 0, mAlphaSource.reg,
574                             reg_imm(mAlphaSource.reg, LSR, shift));
575                     }
576                 } else {
577                     // XXX: it would better to do this in build_blend_factor()
578                     // so we can avoid the extra MOV below.
579                     mAlphaSource.setTo(regs.obtain(),
580                             fragment.size(), CORRUPTIBLE);
581                     if (shift) {
582                         MOV(AL, 0, mAlphaSource.reg,
583                             reg_imm(fragment.reg, LSR, shift));
584                     } else {
585                         MOV(AL, 0, mAlphaSource.reg, fragment.reg);
586                     }
587                 }
588                 mAlphaSource.s -= shift;
589             }
590         }
591 
592         // fog...
593         build_fog( fragment, component, regs );
594 
595         temp = fragment;
596     } else {
597         if (mInfo[component].inDest) {
598             // extraction not needed and replace
599             // we just select the right component
600             if ((mTextureMachine.replaced & component_mask) == 0) {
601                 // component wasn't replaced, so use it!
602                 temp = component_t(parts.iterated, component);
603             }
604             for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
605                 const texture_unit_t& tmu = mTextureMachine.tmu[i];
606                 if ((tmu.mask & component_mask) &&
607                     ((tmu.replaced & component_mask) == 0)) {
608                     temp = component_t(parts.texel[i], component);
609                 }
610             }
611         }
612     }
613 }
614 
isAlphaSourceNeeded() const615 bool GGLAssembler::isAlphaSourceNeeded() const
616 {
617     // XXX: also needed for alpha-test
618     const int bs = mBlendSrc;
619     const int bd = mBlendDst;
620     return  bs==GGL_SRC_ALPHA_SATURATE ||
621             bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA ||
622             bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ;
623 }
624 
625 // ---------------------------------------------------------------------------
626 
build_smooth_shade(const fragment_parts_t & parts)627 void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts)
628 {
629     if (mSmooth && !parts.iterated_packed) {
630         // update the iterated color in a pipelined way...
631         comment("update iterated color");
632         Scratch scratches(registerFile());
633 
634         const int reload = parts.reload;
635         for (int i=0 ; i<4 ; i++) {
636             if (!mInfo[i].iterated)
637                 continue;
638 
639             int c = parts.argb[i].reg;
640             int dx = parts.argb_dx[i].reg;
641 
642             if (reload & 1) {
643                 c = scratches.obtain();
644                 CONTEXT_LOAD(c, generated_vars.argb[i].c);
645             }
646             if (reload & 2) {
647                 dx = scratches.obtain();
648                 CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
649             }
650 
651             if (mSmooth) {
652                 ADD(AL, 0, c, c, dx);
653             }
654 
655             if (reload & 1) {
656                 CONTEXT_STORE(c, generated_vars.argb[i].c);
657                 scratches.recycle(c);
658             }
659             if (reload & 2) {
660                 scratches.recycle(dx);
661             }
662         }
663     }
664 }
665 
666 // ---------------------------------------------------------------------------
667 
build_coverage_application(component_t & fragment,const fragment_parts_t & parts,Scratch & regs)668 void GGLAssembler::build_coverage_application(component_t& fragment,
669         const fragment_parts_t& parts, Scratch& regs)
670 {
671     // here fragment.l is guarenteed to be 0
672     if (mAA) {
673         // coverages are 1.15 fixed-point numbers
674         comment("coverage application");
675 
676         component_t incoming(fragment);
677         modify(fragment, regs);
678 
679         Scratch scratches(registerFile());
680         int cf = scratches.obtain();
681         LDRH(AL, cf, parts.covPtr.reg, immed8_post(2));
682         if (fragment.h > 31) {
683             fragment.h--;
684             SMULWB(AL, fragment.reg, incoming.reg, cf);
685         } else {
686             MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1));
687             SMULWB(AL, fragment.reg, fragment.reg, cf);
688         }
689     }
690 }
691 
692 // ---------------------------------------------------------------------------
693 
build_alpha_test(component_t & fragment,const fragment_parts_t &)694 void GGLAssembler::build_alpha_test(component_t& fragment,
695                                     const fragment_parts_t& /*parts*/)
696 {
697     if (mAlphaTest != GGL_ALWAYS) {
698         comment("Alpha Test");
699         Scratch scratches(registerFile());
700         int ref = scratches.obtain();
701         const int shift = GGL_COLOR_BITS-fragment.size();
702         CONTEXT_LOAD(ref, state.alpha_test.ref);
703         if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift));
704         else       CMP(AL, fragment.reg, ref);
705         int cc = NV;
706         switch (mAlphaTest) {
707         case GGL_NEVER:     cc = NV;    break;
708         case GGL_LESS:      cc = LT;    break;
709         case GGL_EQUAL:     cc = EQ;    break;
710         case GGL_LEQUAL:    cc = LS;    break;
711         case GGL_GREATER:   cc = HI;    break;
712         case GGL_NOTEQUAL:  cc = NE;    break;
713         case GGL_GEQUAL:    cc = HS;    break;
714         }
715         B(cc^1, "discard_after_textures");
716     }
717 }
718 
719 // ---------------------------------------------------------------------------
720 
build_depth_test(const fragment_parts_t & parts,uint32_t mask)721 void GGLAssembler::build_depth_test(
722         const fragment_parts_t& parts, uint32_t mask)
723 {
724     mask &= Z_TEST|Z_WRITE;
725     const needs_t& needs = mBuilderContext.needs;
726     const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p);
727     Scratch scratches(registerFile());
728 
729     if (mDepthTest != GGL_ALWAYS || zmask) {
730         int cc=AL, ic=AL;
731         switch (mDepthTest) {
732         case GGL_LESS:      ic = HI;    break;
733         case GGL_EQUAL:     ic = EQ;    break;
734         case GGL_LEQUAL:    ic = HS;    break;
735         case GGL_GREATER:   ic = LT;    break;
736         case GGL_NOTEQUAL:  ic = NE;    break;
737         case GGL_GEQUAL:    ic = LS;    break;
738         case GGL_NEVER:
739             // this never happens, because it's taken care of when
740             // computing the needs. but we keep it for completness.
741             comment("Depth Test (NEVER)");
742             B(AL, "discard_before_textures");
743             return;
744         case GGL_ALWAYS:
745             // we're here because zmask is enabled
746             mask &= ~Z_TEST;    // test always passes.
747             break;
748         }
749 
750         // inverse the condition
751         cc = ic^1;
752 
753         if ((mask & Z_WRITE) && !zmask) {
754             mask &= ~Z_WRITE;
755         }
756 
757         if (!mask)
758             return;
759 
760         comment("Depth Test");
761 
762         int zbase = scratches.obtain();
763         int depth = scratches.obtain();
764         int z = parts.z.reg;
765 
766         CONTEXT_ADDR_LOAD(zbase, generated_vars.zbase);  // stall
767         ADDR_SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
768             // above does zbase = zbase + ((count >> 16) << 1)
769 
770         if (mask & Z_TEST) {
771             LDRH(AL, depth, zbase);  // stall
772             CMP(AL, depth, reg_imm(z, LSR, 16));
773             B(cc, "discard_before_textures");
774         }
775         if (mask & Z_WRITE) {
776             if (mask == Z_WRITE) {
777                 // only z-write asked, cc is meaningless
778                 ic = AL;
779             }
780             MOV(AL, 0, depth, reg_imm(z, LSR, 16));
781             STRH(ic, depth, zbase);
782         }
783     }
784 }
785 
build_iterate_z(const fragment_parts_t & parts)786 void GGLAssembler::build_iterate_z(const fragment_parts_t& parts)
787 {
788     const needs_t& needs = mBuilderContext.needs;
789     if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
790         Scratch scratches(registerFile());
791         int dzdx = scratches.obtain();
792         CONTEXT_LOAD(dzdx, generated_vars.dzdx);    // stall
793         ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx);
794     }
795 }
796 
build_iterate_f(const fragment_parts_t &)797 void GGLAssembler::build_iterate_f(const fragment_parts_t& /*parts*/)
798 {
799     const needs_t& needs = mBuilderContext.needs;
800     if (GGL_READ_NEEDS(P_FOG, needs.p)) {
801         Scratch scratches(registerFile());
802         int dfdx = scratches.obtain();
803         int f = scratches.obtain();
804         CONTEXT_LOAD(f,     generated_vars.f);
805         CONTEXT_LOAD(dfdx,  generated_vars.dfdx);   // stall
806         ADD(AL, 0, f, f, dfdx);
807         CONTEXT_STORE(f,    generated_vars.f);
808     }
809 }
810 
811 // ---------------------------------------------------------------------------
812 
build_logic_op(pixel_t & pixel,Scratch & regs)813 void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs)
814 {
815     const needs_t& needs = mBuilderContext.needs;
816     const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
817     if (opcode == GGL_COPY)
818         return;
819 
820     comment("logic operation");
821 
822     pixel_t s(pixel);
823     if (!(pixel.flags & CORRUPTIBLE)) {
824         pixel.reg = regs.obtain();
825         pixel.flags |= CORRUPTIBLE;
826     }
827 
828     pixel_t d(mDstPixel);
829     switch(opcode) {
830     case GGL_CLEAR:         MOV(AL, 0, pixel.reg, imm(0));          break;
831     case GGL_AND:           AND(AL, 0, pixel.reg, s.reg, d.reg);    break;
832     case GGL_AND_REVERSE:   BIC(AL, 0, pixel.reg, s.reg, d.reg);    break;
833     case GGL_COPY:                                                  break;
834     case GGL_AND_INVERTED:  BIC(AL, 0, pixel.reg, d.reg, s.reg);    break;
835     case GGL_NOOP:          MOV(AL, 0, pixel.reg, d.reg);           break;
836     case GGL_XOR:           EOR(AL, 0, pixel.reg, s.reg, d.reg);    break;
837     case GGL_OR:            ORR(AL, 0, pixel.reg, s.reg, d.reg);    break;
838     case GGL_NOR:           ORR(AL, 0, pixel.reg, s.reg, d.reg);
839                             MVN(AL, 0, pixel.reg, pixel.reg);       break;
840     case GGL_EQUIV:         EOR(AL, 0, pixel.reg, s.reg, d.reg);
841                             MVN(AL, 0, pixel.reg, pixel.reg);       break;
842     case GGL_INVERT:        MVN(AL, 0, pixel.reg, d.reg);           break;
843     case GGL_OR_REVERSE:    // s | ~d == ~(~s & d)
844                             BIC(AL, 0, pixel.reg, d.reg, s.reg);
845                             MVN(AL, 0, pixel.reg, pixel.reg);       break;
846     case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg);           break;
847     case GGL_OR_INVERTED:   // ~s | d == ~(s & ~d)
848                             BIC(AL, 0, pixel.reg, s.reg, d.reg);
849                             MVN(AL, 0, pixel.reg, pixel.reg);       break;
850     case GGL_NAND:          AND(AL, 0, pixel.reg, s.reg, d.reg);
851                             MVN(AL, 0, pixel.reg, pixel.reg);       break;
852     case GGL_SET:           MVN(AL, 0, pixel.reg, imm(0));          break;
853     };
854 }
855 
856 // ---------------------------------------------------------------------------
857 
find_bottom(uint32_t val)858 static uint32_t find_bottom(uint32_t val)
859 {
860     uint32_t i = 0;
861     while (!(val & (3<<i)))
862         i+= 2;
863     return i;
864 }
865 
normalize(uint32_t & val,uint32_t & rot)866 static void normalize(uint32_t& val, uint32_t& rot)
867 {
868     rot = 0;
869     while (!(val&3)  || (val & 0xFC000000)) {
870         uint32_t newval;
871         newval = val >> 2;
872         newval |= (val&3) << 30;
873         val = newval;
874         rot += 2;
875         if (rot == 32) {
876             rot = 0;
877             break;
878         }
879     }
880 }
881 
build_and_immediate(int d,int s,uint32_t mask,int bits)882 void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
883 {
884     uint32_t rot;
885     uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
886     mask &= size;
887 
888     if (mask == size) {
889         if (d != s)
890             MOV( AL, 0, d, s);
891         return;
892     }
893 
894     if ((getCodegenArch() == CODEGEN_ARCH_MIPS) ||
895         (getCodegenArch() == CODEGEN_ARCH_MIPS64)) {
896         // MIPS can do 16-bit imm in 1 instr, 32-bit in 3 instr
897         // the below ' while (mask)' code is buggy on mips
898         // since mips returns true on isValidImmediate()
899         // then we get multiple AND instr (positive logic)
900         AND( AL, 0, d, s, imm(mask) );
901         return;
902     }
903     else if (getCodegenArch() == CODEGEN_ARCH_ARM64) {
904         AND( AL, 0, d, s, imm(mask) );
905         return;
906     }
907 
908     int negative_logic = !isValidImmediate(mask);
909     if (negative_logic) {
910         mask = ~mask & size;
911     }
912     normalize(mask, rot);
913 
914     if (mask) {
915         while (mask) {
916             uint32_t bitpos = find_bottom(mask);
917             int shift = rot + bitpos;
918             uint32_t m = mask & (0xff << bitpos);
919             mask &= ~m;
920             m >>= bitpos;
921             int32_t newMask =  (m<<shift) | (m>>(32-shift));
922             if (!negative_logic) {
923                 AND( AL, 0, d, s, imm(newMask) );
924             } else {
925                 BIC( AL, 0, d, s, imm(newMask) );
926             }
927             s = d;
928         }
929     } else {
930         MOV( AL, 0, d, imm(0));
931     }
932 }
933 
build_masking(pixel_t & pixel,Scratch & regs)934 void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs)
935 {
936     if (!mMasking || mAllMasked) {
937         return;
938     }
939 
940     comment("color mask");
941 
942     pixel_t fb(mDstPixel);
943     pixel_t s(pixel);
944     if (!(pixel.flags & CORRUPTIBLE)) {
945         pixel.reg = regs.obtain();
946         pixel.flags |= CORRUPTIBLE;
947     }
948 
949     int mask = 0;
950     for (int i=0 ; i<4 ; i++) {
951         const int component_mask = 1<<i;
952         const int h = fb.format.c[i].h;
953         const int l = fb.format.c[i].l;
954         if (h && (!(mMasking & component_mask))) {
955             mask |= ((1<<(h-l))-1) << l;
956         }
957     }
958 
959     // There is no need to clear the masked components of the source
960     // (unless we applied a logic op), because they're already zeroed
961     // by construction (masked components are not computed)
962 
963     if (mLogicOp) {
964         const needs_t& needs = mBuilderContext.needs;
965         const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
966         if (opcode != GGL_CLEAR) {
967             // clear masked component of source
968             build_and_immediate(pixel.reg, s.reg, mask, fb.size());
969             s = pixel;
970         }
971     }
972 
973     // clear non masked components of destination
974     build_and_immediate(fb.reg, fb.reg, ~mask, fb.size());
975 
976     // or back the channels that were masked
977     if (s.reg == fb.reg) {
978          // this is in fact a MOV
979         if (s.reg == pixel.reg) {
980             // ugh. this in in fact a nop
981         } else {
982             MOV(AL, 0, pixel.reg, fb.reg);
983         }
984     } else {
985         ORR(AL, 0, pixel.reg, s.reg, fb.reg);
986     }
987 }
988 
989 // ---------------------------------------------------------------------------
990 
base_offset(const pointer_t & d,const pointer_t & b,const reg_t & o)991 void GGLAssembler::base_offset(
992         const pointer_t& d, const pointer_t& b, const reg_t& o)
993 {
994     switch (b.size) {
995     case 32:
996         ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
997         break;
998     case 24:
999         if (d.reg == b.reg) {
1000             ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
1001             ADDR_ADD(AL, 0, d.reg, d.reg, o.reg);
1002         } else {
1003             ADDR_ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
1004             ADDR_ADD(AL, 0, d.reg, d.reg, b.reg);
1005         }
1006         break;
1007     case 16:
1008         ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
1009         break;
1010     case 8:
1011         ADDR_ADD(AL, 0, d.reg, b.reg, o.reg);
1012         break;
1013     }
1014 }
1015 
1016 // ----------------------------------------------------------------------------
1017 // cheezy register allocator...
1018 // ----------------------------------------------------------------------------
1019 
1020 // Modified to support MIPS processors, in a very simple way. We retain the
1021 // (Arm) limit of 16 total registers, but shift the mapping of those registers
1022 // from 0-15, to 2-17. Register 0 on Mips cannot be used as GP registers, and
1023 // register 1 has a traditional use as a temp).
1024 
RegisterAllocator(int arch)1025 RegisterAllocator::RegisterAllocator(int arch) : mRegs(arch)
1026 {
1027 }
1028 
reset()1029 void RegisterAllocator::reset()
1030 {
1031     mRegs.reset();
1032 }
1033 
reserveReg(int reg)1034 int RegisterAllocator::reserveReg(int reg)
1035 {
1036     return mRegs.reserve(reg);
1037 }
1038 
obtainReg()1039 int RegisterAllocator::obtainReg()
1040 {
1041     return mRegs.obtain();
1042 }
1043 
recycleReg(int reg)1044 void RegisterAllocator::recycleReg(int reg)
1045 {
1046     mRegs.recycle(reg);
1047 }
1048 
registerFile()1049 RegisterAllocator::RegisterFile& RegisterAllocator::registerFile()
1050 {
1051     return mRegs;
1052 }
1053 
1054 // ----------------------------------------------------------------------------
1055 
RegisterFile(int codegen_arch)1056 RegisterAllocator::RegisterFile::RegisterFile(int codegen_arch)
1057     : mRegs(0), mTouched(0), mStatus(0), mArch(codegen_arch), mRegisterOffset(0)
1058 {
1059     if ((mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) ||
1060         (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS64)) {
1061         mRegisterOffset = 2;    // ARM has regs 0..15, MIPS offset to 2..17
1062     }
1063     reserve(ARMAssemblerInterface::SP);
1064     reserve(ARMAssemblerInterface::PC);
1065 }
1066 
RegisterFile(const RegisterFile & rhs,int codegen_arch)1067 RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs, int codegen_arch)
1068     : mRegs(rhs.mRegs), mTouched(rhs.mTouched), mArch(codegen_arch), mRegisterOffset(0)
1069 {
1070     if ((mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) ||
1071         (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS64)) {
1072         mRegisterOffset = 2;    // ARM has regs 0..15, MIPS offset to 2..17
1073     }
1074 }
1075 
~RegisterFile()1076 RegisterAllocator::RegisterFile::~RegisterFile()
1077 {
1078 }
1079 
operator ==(const RegisterFile & rhs) const1080 bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const
1081 {
1082     return (mRegs == rhs.mRegs);
1083 }
1084 
reset()1085 void RegisterAllocator::RegisterFile::reset()
1086 {
1087     mRegs = mTouched = mStatus = 0;
1088     reserve(ARMAssemblerInterface::SP);
1089     reserve(ARMAssemblerInterface::PC);
1090 }
1091 
1092 // RegisterFile::reserve() take a register parameter in the
1093 // range 0-15 (Arm compatible), but on a Mips processor, will
1094 // return the actual allocated register in the range 2-17.
reserve(int reg)1095 int RegisterAllocator::RegisterFile::reserve(int reg)
1096 {
1097     reg += mRegisterOffset;
1098     LOG_ALWAYS_FATAL_IF(isUsed(reg),
1099                         "reserving register %d, but already in use",
1100                         reg);
1101     mRegs |= (1<<reg);
1102     mTouched |= mRegs;
1103     return reg;
1104 }
1105 
1106 // This interface uses regMask in range 2-17 on MIPS, no translation.
reserveSeveral(uint32_t regMask)1107 void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
1108 {
1109     mRegs |= regMask;
1110     mTouched |= regMask;
1111 }
1112 
isUsed(int reg) const1113 int RegisterAllocator::RegisterFile::isUsed(int reg) const
1114 {
1115     LOG_ALWAYS_FATAL_IF(reg>=16+(int)mRegisterOffset, "invalid register %d", reg);
1116     return mRegs & (1<<reg);
1117 }
1118 
obtain()1119 int RegisterAllocator::RegisterFile::obtain()
1120 {
1121     const char priorityList[14] = {  0,  1, 2, 3,
1122                                     12, 14, 4, 5,
1123                                      6,  7, 8, 9,
1124                                     10, 11 };
1125     const int nbreg = sizeof(priorityList);
1126     int i, r, reg;
1127     for (i=0 ; i<nbreg ; i++) {
1128         r = priorityList[i];
1129         if (!isUsed(r + mRegisterOffset)) {
1130             break;
1131         }
1132     }
1133     // this is not an error anymore because, we'll try again with
1134     // a lower optimization level.
1135     //ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n");
1136     if (i >= nbreg) {
1137         mStatus |= OUT_OF_REGISTERS;
1138         // we return SP so we can more easily debug things
1139         // the code will never be run anyway.
1140         return ARMAssemblerInterface::SP;
1141     }
1142     reg = reserve(r);  // Param in Arm range 0-15, returns range 2-17 on Mips.
1143     return reg;
1144 }
1145 
hasFreeRegs() const1146 bool RegisterAllocator::RegisterFile::hasFreeRegs() const
1147 {
1148     uint32_t regs = mRegs >> mRegisterOffset;   // MIPS fix.
1149     return ((regs & 0xFFFF) == 0xFFFF) ? false : true;
1150 }
1151 
countFreeRegs() const1152 int RegisterAllocator::RegisterFile::countFreeRegs() const
1153 {
1154     uint32_t regs = mRegs >> mRegisterOffset;   // MIPS fix.
1155     int f = ~regs & 0xFFFF;
1156     // now count number of 1
1157    f = (f & 0x5555) + ((f>>1) & 0x5555);
1158    f = (f & 0x3333) + ((f>>2) & 0x3333);
1159    f = (f & 0x0F0F) + ((f>>4) & 0x0F0F);
1160    f = (f & 0x00FF) + ((f>>8) & 0x00FF);
1161    return f;
1162 }
1163 
recycle(int reg)1164 void RegisterAllocator::RegisterFile::recycle(int reg)
1165 {
1166     // commented out, since common failure of running out of regs
1167     // triggers this assertion. Since the code is not execectued
1168     // in that case, it does not matter. No reason to FATAL err.
1169     // LOG_FATAL_IF(!isUsed(reg),
1170     //         "recycling unallocated register %d",
1171     //         reg);
1172     mRegs &= ~(1<<reg);
1173 }
1174 
recycleSeveral(uint32_t regMask)1175 void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
1176 {
1177     // commented out, since common failure of running out of regs
1178     // triggers this assertion. Since the code is not execectued
1179     // in that case, it does not matter. No reason to FATAL err.
1180     // LOG_FATAL_IF((mRegs & regMask)!=regMask,
1181     //         "recycling unallocated registers "
1182     //         "(recycle=%08x, allocated=%08x, unallocated=%08x)",
1183     //         regMask, mRegs, mRegs&regMask);
1184     mRegs &= ~regMask;
1185 }
1186 
touched() const1187 uint32_t RegisterAllocator::RegisterFile::touched() const
1188 {
1189     return mTouched;
1190 }
1191 
1192 // ----------------------------------------------------------------------------
1193 
1194 }; // namespace android
1195 
1196