1## @file
2# Collect all defined strings in multiple uni files.
3#
4# Copyright (c) 2014, Intel Corporation. All rights reserved.<BR>
5#
6# This program and the accompanying materials are licensed and made available
7# under the terms and conditions of the BSD License which accompanies this
8# distribution. The full text of the license may be found at
9# http://opensource.org/licenses/bsd-license.php
10#
11# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
13#
14"""
15Collect all defined strings in multiple uni files
16"""
17
18##
19# Import Modules
20#
21import os, codecs, re
22import distutils.util
23from Logger import ToolError
24from Logger import Log as EdkLogger
25from Logger import StringTable as ST
26from Library.String import GetLineNo
27from Library.Misc import PathClass
28from Library.Misc import GetCharIndexOutStr
29from Library import DataType as DT
30
31##
32# Static definitions
33#
34UNICODE_WIDE_CHAR = u'\\wide'
35UNICODE_NARROW_CHAR = u'\\narrow'
36UNICODE_NON_BREAKING_CHAR = u'\\nbr'
37UNICODE_UNICODE_CR = '\r'
38UNICODE_UNICODE_LF = '\n'
39
40NARROW_CHAR = u'\uFFF0'
41WIDE_CHAR = u'\uFFF1'
42NON_BREAKING_CHAR = u'\uFFF2'
43CR = u'\u000D'
44LF = u'\u000A'
45NULL = u'\u0000'
46TAB = u'\t'
47BACK_SPLASH = u'\\'
48
49gINCLUDE_PATTERN = re.compile("^!include[\s]+([\S]+)[\s]*$", re.MULTILINE | re.UNICODE)
50
51gLANG_CONV_TABLE = {'eng':'en', 'fra':'fr', \
52                 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
53                 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
54                 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
55                 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
56                 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
57                 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
58                 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
59                 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
60                 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
61                 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
62                 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
63                 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
64                 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
65                 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
66                 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
67                 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
68                 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
69                 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
70                 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
71                 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
72                 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
73                 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
74                 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
75                 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
76                 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
77                 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
78                 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
79                 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
80                 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
81                 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
82                 'zho':'zh', 'zul':'zu'}
83
84## Convert a python unicode string to a normal string
85#
86# Convert a python unicode string to a normal string
87# UniToStr(u'I am a string') is 'I am a string'
88#
89# @param Uni:  The python unicode string
90#
91# @retval:     The formatted normal string
92#
93def UniToStr(Uni):
94    return repr(Uni)[2:-1]
95
96## Convert a unicode string to a Hex list
97#
98# Convert a unicode string to a Hex list
99# UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
100#
101# @param Uni:    The python unicode string
102#
103# @retval List:  The formatted hex list
104#
105def UniToHexList(Uni):
106    List = []
107    for Item in Uni:
108        Temp = '%04X' % ord(Item)
109        List.append('0x' + Temp[2:4])
110        List.append('0x' + Temp[0:2])
111    return List
112
113## Convert special unicode characters
114#
115# Convert special characters to (c), (r) and (tm).
116#
117# @param Uni:    The python unicode string
118#
119# @retval NewUni:  The converted unicode string
120#
121def ConvertSpecialUnicodes(Uni):
122    NewUni = Uni
123    NewUni = NewUni.replace(u'\u00A9', '(c)')
124    NewUni = NewUni.replace(u'\u00AE', '(r)')
125    NewUni = NewUni.replace(u'\u2122', '(tm)')
126    return NewUni
127
128## GetLanguageCode1766
129#
130# Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
131# RFC 1766 language codes supported in compatiblity mode
132# RFC 4646 language codes supported in native mode
133#
134# @param LangName:   Language codes read from .UNI file
135#
136# @retval LangName:  Valid lanugage code in RFC 1766 format or None
137#
138def GetLanguageCode1766(LangName, File=None):
139    length = len(LangName)
140    if length == 2:
141        if LangName.isalpha():
142            for Key in gLANG_CONV_TABLE.keys():
143                if gLANG_CONV_TABLE.get(Key) == LangName.lower():
144                    return Key
145    elif length == 3:
146        if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()):
147            return LangName
148        else:
149            EdkLogger.Error("Unicode File Parser",
150                             ToolError.FORMAT_INVALID,
151                             "Invalid RFC 1766 language code : %s" % LangName,
152                             File)
153    elif length == 5:
154        if LangName[0:2].isalpha() and LangName[2] == '-':
155            for Key in gLANG_CONV_TABLE.keys():
156                if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
157                    return Key
158    elif length >= 6:
159        if LangName[0:2].isalpha() and LangName[2] == '-':
160            for Key in gLANG_CONV_TABLE.keys():
161                if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
162                    return Key
163        if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None and LangName[3] == '-':
164            for Key in gLANG_CONV_TABLE.keys():
165                if Key == LangName[0:3].lower():
166                    return Key
167
168    EdkLogger.Error("Unicode File Parser",
169                             ToolError.FORMAT_INVALID,
170                             "Invalid RFC 4646 language code : %s" % LangName,
171                             File)
172
173## GetLanguageCode
174#
175# Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
176# RFC 1766 language codes supported in compatiblity mode
177# RFC 4646 language codes supported in native mode
178#
179# @param LangName:   Language codes read from .UNI file
180#
181# @retval LangName:  Valid lanugage code in RFC 4646 format or None
182#
183def GetLanguageCode(LangName, IsCompatibleMode, File):
184    length = len(LangName)
185    if IsCompatibleMode:
186        if length == 3 and LangName.isalpha():
187            TempLangName = gLANG_CONV_TABLE.get(LangName.lower())
188            if TempLangName != None:
189                return TempLangName
190            return LangName
191        else:
192            EdkLogger.Error("Unicode File Parser",
193                             ToolError.FORMAT_INVALID,
194                             "Invalid RFC 1766 language code : %s" % LangName,
195                             File)
196    if (LangName[0] == 'X' or LangName[0] == 'x') and LangName[1] == '-':
197        return LangName
198    if length == 2:
199        if LangName.isalpha():
200            return LangName
201    elif length == 3:
202        if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None:
203            return LangName
204    elif length == 5:
205        if LangName[0:2].isalpha() and LangName[2] == '-':
206            return LangName
207    elif length >= 6:
208        if LangName[0:2].isalpha() and LangName[2] == '-':
209            return LangName
210        if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None and LangName[3] == '-':
211            return LangName
212
213    EdkLogger.Error("Unicode File Parser",
214                             ToolError.FORMAT_INVALID,
215                             "Invalid RFC 4646 language code : %s" % LangName,
216                             File)
217
218## FormatUniEntry
219#
220# Formated the entry in Uni file.
221#
222# @param StrTokenName    StrTokenName.
223# @param TokenValueList  A list need to be processed.
224# @param ContainerFile   ContainerFile.
225#
226# @return formated entry
227def FormatUniEntry(StrTokenName, TokenValueList, ContainerFile):
228    SubContent = ''
229    PreFormatLength = 40
230    if len(StrTokenName) > PreFormatLength:
231        PreFormatLength = len(StrTokenName) + 1
232    for (Lang, Value) in TokenValueList:
233        if not Value or Lang == DT.TAB_LANGUAGE_EN_X:
234            continue
235        if Lang == '':
236            Lang = DT.TAB_LANGUAGE_EN_US
237        if Lang == 'eng':
238            Lang = DT.TAB_LANGUAGE_EN_US
239        elif len(Lang.split('-')[0]) == 3:
240            Lang = GetLanguageCode(Lang.split('-')[0], True, ContainerFile)
241        else:
242            Lang = GetLanguageCode(Lang, False, ContainerFile)
243        ValueList = Value.split('\n')
244        SubValueContent = ''
245        for SubValue in ValueList:
246            if SubValue.strip():
247                SubValueContent += \
248                ' ' * (PreFormatLength + len('#language en-US ')) + '\"%s\\n\"' % SubValue.strip() + '\r\n'
249        SubValueContent = SubValueContent[(PreFormatLength + len('#language en-US ')):SubValueContent.rfind('\\n')] \
250        + '\"' + '\r\n'
251        SubContent += ' '*PreFormatLength + '#language %-5s ' % Lang + SubValueContent
252    if SubContent:
253        SubContent = StrTokenName + ' '*(PreFormatLength - len(StrTokenName)) + SubContent[PreFormatLength:]
254    return SubContent
255
256
257## StringDefClassObject
258#
259# A structure for language definition
260#
261class StringDefClassObject(object):
262    def __init__(self, Name = None, Value = None, Referenced = False, Token = None, UseOtherLangDef = ''):
263        self.StringName = ''
264        self.StringNameByteList = []
265        self.StringValue = ''
266        self.StringValueByteList = ''
267        self.Token = 0
268        self.Referenced = Referenced
269        self.UseOtherLangDef = UseOtherLangDef
270        self.Length = 0
271
272        if Name != None:
273            self.StringName = Name
274            self.StringNameByteList = UniToHexList(Name)
275        if Value != None:
276            self.StringValue = Value
277            self.StringValueByteList = UniToHexList(self.StringValue)
278            self.Length = len(self.StringValueByteList)
279        if Token != None:
280            self.Token = Token
281
282    def __str__(self):
283        return repr(self.StringName) + ' ' + \
284               repr(self.Token) + ' ' + \
285               repr(self.Referenced) + ' ' + \
286               repr(self.StringValue) + ' ' + \
287               repr(self.UseOtherLangDef)
288
289    def UpdateValue(self, Value = None):
290        if Value != None:
291            if self.StringValue:
292                self.StringValue = self.StringValue + '\r\n' + Value
293            else:
294                self.StringValue = Value
295            self.StringValueByteList = UniToHexList(self.StringValue)
296            self.Length = len(self.StringValueByteList)
297
298## UniFileClassObject
299#
300# A structure for .uni file definition
301#
302class UniFileClassObject(object):
303    def __init__(self, FileList = None, IsCompatibleMode = False, IncludePathList = None):
304        self.FileList = FileList
305        self.File = None
306        self.IncFileList = FileList
307        self.UniFileHeader = ''
308        self.Token = 2
309        self.LanguageDef = []                   #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
310        self.OrderedStringList = {}             #{ u'LanguageIdentifier' : [StringDefClassObject]  }
311        self.OrderedStringDict = {}             #{ u'LanguageIdentifier' : {StringName:(IndexInList)}  }
312        self.OrderedStringListByToken = {}      #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
313        self.IsCompatibleMode = IsCompatibleMode
314        if not IncludePathList:
315            self.IncludePathList = []
316        else:
317            self.IncludePathList = IncludePathList
318        if len(self.FileList) > 0:
319            self.LoadUniFiles(FileList)
320
321    #
322    # Get Language definition
323    #
324    def GetLangDef(self, File, Line):
325        Lang = distutils.util.split_quoted((Line.split(u"//")[0]))
326        if len(Lang) != 3:
327            try:
328                FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').read()
329            except UnicodeError, Xstr:
330                FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').read()
331            except:
332                EdkLogger.Error("Unicode File Parser",
333                                ToolError.FILE_OPEN_FAILURE,
334                                "File read failure: %s" % str(Xstr),
335                                ExtraData=File)
336            LineNo = GetLineNo(FileIn, Line, False)
337            EdkLogger.Error("Unicode File Parser",
338                             ToolError.PARSER_ERROR,
339                             "Wrong language definition",
340                             ExtraData="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line,
341                             File = File, Line = LineNo)
342        else:
343            LangName = GetLanguageCode(Lang[1], self.IsCompatibleMode, self.File)
344            LangPrintName = Lang[2]
345
346        IsLangInDef = False
347        for Item in self.LanguageDef:
348            if Item[0] == LangName:
349                IsLangInDef = True
350                break
351
352        if not IsLangInDef:
353            self.LanguageDef.append([LangName, LangPrintName])
354
355        #
356        # Add language string
357        #
358        self.AddStringToList(u'$LANGUAGE_NAME', LangName, LangName, 0, True, Index=0)
359        self.AddStringToList(u'$PRINTABLE_LANGUAGE_NAME', LangName, LangPrintName, 1, True, Index=1)
360
361        if not IsLangInDef:
362            #
363            # The found STRING tokens will be added into new language string list
364            # so that the unique STRING identifier is reserved for all languages in the package list.
365            #
366            FirstLangName = self.LanguageDef[0][0]
367            if LangName != FirstLangName:
368                for Index in range (2, len (self.OrderedStringList[FirstLangName])):
369                    Item = self.OrderedStringList[FirstLangName][Index]
370                    if Item.UseOtherLangDef != '':
371                        OtherLang = Item.UseOtherLangDef
372                    else:
373                        OtherLang = FirstLangName
374                    self.OrderedStringList[LangName].append (StringDefClassObject(Item.StringName,
375                                                                                  '',
376                                                                                  Item.Referenced,
377                                                                                  Item.Token,
378                                                                                  OtherLang))
379                    self.OrderedStringDict[LangName][Item.StringName] = len(self.OrderedStringList[LangName]) - 1
380        return True
381
382    #
383    # Get String name and value
384    #
385    def GetStringObject(self, Item):
386        Language = ''
387        Value = ''
388
389        Name = Item.split()[1]
390        # Check the string name is the upper character
391        if Name != '':
392            MatchString = re.match('[A-Z0-9_]+', Name, re.UNICODE)
393            if MatchString == None or MatchString.end(0) != len(Name):
394                EdkLogger.Error("Unicode File Parser",
395                             ToolError.FORMAT_INVALID,
396                             'The string token name %s in UNI file %s must be upper case character.' %(Name, self.File))
397        LanguageList = Item.split(u'#language ')
398        for IndexI in range(len(LanguageList)):
399            if IndexI == 0:
400                continue
401            else:
402                Language = LanguageList[IndexI].split()[0]
403                #.replace(u'\r\n', u'')
404                Value = \
405                LanguageList[IndexI][LanguageList[IndexI].find(u'\"') + len(u'\"') : LanguageList[IndexI].rfind(u'\"')]
406                Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
407                self.AddStringToList(Name, Language, Value)
408
409    #
410    # Get include file list and load them
411    #
412    def GetIncludeFile(self, Item, Dir = None):
413        if Dir:
414            pass
415        FileName = Item[Item.find(u'!include ') + len(u'!include ') :Item.find(u' ', len(u'!include '))][1:-1]
416        self.LoadUniFile(FileName)
417
418    #
419    # Pre-process before parse .uni file
420    #
421    def PreProcess(self, File, IsIncludeFile=False):
422        if not os.path.exists(File.Path) or not os.path.isfile(File.Path):
423            EdkLogger.Error("Unicode File Parser",
424                             ToolError.FILE_NOT_FOUND,
425                             ExtraData=File.Path)
426
427        try:
428            FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines()
429        except UnicodeError:
430            FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines()
431        except:
432            EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=File.Path)
433
434
435        #
436        # get the file header
437        #
438        Lines = []
439        HeaderStart = False
440        HeaderEnd = False
441        if not self.UniFileHeader:
442            FirstGenHeader = True
443        else:
444            FirstGenHeader = False
445        for Line in FileIn:
446            Line = Line.strip()
447            if Line == u'':
448                continue
449            if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and (Line.find(DT.TAB_HEADER_COMMENT) > -1) \
450                and not HeaderEnd and not HeaderStart:
451                HeaderStart = True
452            if not Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd:
453                HeaderEnd = True
454            if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd and FirstGenHeader:
455                self.UniFileHeader += Line + '\r\n'
456                continue
457
458        #
459        # Use unique identifier
460        #
461        FindFlag = -1
462        LineCount = 0
463        MultiLineFeedExits = False
464        #
465        # 0: initial value
466        # 1: signle String entry exist
467        # 2: line feed exist under the some signle String entry
468        #
469        StringEntryExistsFlag = 0
470        for Line in FileIn:
471            Line = FileIn[LineCount]
472            LineCount += 1
473            Line = Line.strip()
474            #
475            # Ignore comment line and empty line
476            #
477            if Line == u'' or Line.startswith(u'//'):
478                #
479                # Change the single line String entry flag status
480                #
481                if StringEntryExistsFlag == 1:
482                    StringEntryExistsFlag = 2
483                #
484                # If the '#string' line and the '#language' line are not in the same line,
485                # there should be only one line feed character betwwen them
486                #
487                if MultiLineFeedExits:
488                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
489                continue
490
491            MultiLineFeedExits = False
492            #
493            # Process comment embeded in string define lines
494            #
495            FindFlag = Line.find(u'//')
496            if FindFlag != -1 and Line.find(u'//') < Line.find(u'"'):
497                Line = Line.replace(Line[FindFlag:], u' ')
498                if FileIn[LineCount].strip().startswith('#language'):
499                    Line = Line + FileIn[LineCount]
500                    FileIn[LineCount-1] = Line
501                    FileIn[LineCount] = '\r\n'
502                    LineCount -= 1
503                    for Index in xrange (LineCount + 1, len (FileIn) - 1):
504                        if (Index == len(FileIn) -1):
505                            FileIn[Index] = '\r\n'
506                        else:
507                            FileIn[Index] = FileIn[Index + 1]
508                    continue
509            CommIndex = GetCharIndexOutStr(u'/', Line)
510            if CommIndex > -1:
511                if (len(Line) - 1) > CommIndex:
512                    if Line[CommIndex+1] == u'/':
513                        Line = Line[:CommIndex].strip()
514                    else:
515                        EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
516                else:
517                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
518
519            Line = Line.replace(UNICODE_WIDE_CHAR, WIDE_CHAR)
520            Line = Line.replace(UNICODE_NARROW_CHAR, NARROW_CHAR)
521            Line = Line.replace(UNICODE_NON_BREAKING_CHAR, NON_BREAKING_CHAR)
522
523            Line = Line.replace(u'\\\\', u'\u0006')
524            Line = Line.replace(u'\\r\\n', CR + LF)
525            Line = Line.replace(u'\\n', CR + LF)
526            Line = Line.replace(u'\\r', CR)
527            Line = Line.replace(u'\\t', u'\t')
528            Line = Line.replace(u'''\"''', u'''"''')
529            Line = Line.replace(u'\t', u' ')
530            Line = Line.replace(u'\u0006', u'\\')
531
532            IncList = gINCLUDE_PATTERN.findall(Line)
533            if len(IncList) == 1:
534                for Dir in [File.Dir] + self.IncludePathList:
535                    IncFile = PathClass(str(IncList[0]), Dir)
536                    self.IncFileList.append(IncFile)
537                    if os.path.isfile(IncFile.Path):
538                        Lines.extend(self.PreProcess(IncFile, True))
539                        break
540                else:
541                    EdkLogger.Error("Unicode File Parser",
542                                    ToolError.FILE_NOT_FOUND,
543                                    Message="Cannot find include file",
544                                    ExtraData=str(IncList[0]))
545                continue
546
547            #
548            # Between Name entry and Language entry can not contain line feed
549            #
550            if Line.startswith(u'#string') and Line.find(u'#language') == -1:
551                MultiLineFeedExits = True
552
553            if Line.startswith(u'#string') and Line.find(u'#language') > 0 and Line.find(u'"') < 0:
554                MultiLineFeedExits = True
555
556            #
557            # Between Language entry and String entry can not contain line feed
558            #
559            if Line.startswith(u'#language') and len(Line.split()) == 2:
560                MultiLineFeedExits = True
561
562            #
563            # Between two String entry, can not contain line feed
564            #
565            if Line.startswith(u'"'):
566                if StringEntryExistsFlag == 2:
567                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
568                                    Message=ST.ERR_UNIPARSE_LINEFEED_UP_EXIST % Line, ExtraData=File.Path)
569
570                StringEntryExistsFlag = 1
571                if not Line.endswith('"'):
572                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
573            elif Line.startswith(u'#language'):
574                if StringEntryExistsFlag == 2:
575                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
576                                    Message=ST.ERR_UNIPARSE_LINEFEED_UP_EXIST % Line, ExtraData=File.Path)
577                StringEntryExistsFlag = 0
578            else:
579                StringEntryExistsFlag = 0
580
581            Lines.append(Line)
582
583        #
584        # Convert string def format as below
585        #
586        #     #string MY_STRING_1
587        #     #language eng
588        #     "My first English string line 1"
589        #     "My first English string line 2"
590        #     #string MY_STRING_1
591        #     #language spa
592        #     "Mi segunda secuencia 1"
593        #     "Mi segunda secuencia 2"
594        #
595
596        if not IsIncludeFile and not Lines:
597            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
598                Message=ST.ERR_UNIPARSE_NO_SECTION_EXIST, \
599                ExtraData=File.Path)
600
601        NewLines = []
602        StrName = u''
603        ExistStrNameList = []
604        for Line in Lines:
605            if StrName and not StrName.split()[1].startswith(DT.TAB_STR_TOKENCNAME + DT.TAB_UNDERLINE_SPLIT):
606                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
607                                Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
608                                ExtraData=File.Path)
609
610            if StrName and len(StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)) == 4:
611                StringTokenList = StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)
612                if (StringTokenList[3].upper() in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP] and \
613                    StringTokenList[3] not in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP]) or \
614                    (StringTokenList[2].upper() == DT.TAB_STR_TOKENERR and StringTokenList[2] != DT.TAB_STR_TOKENERR):
615                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
616                                Message=ST.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR % StrName.split()[1], \
617                                ExtraData=File.Path)
618
619            if Line.count(u'#language') > 1:
620                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
621                                Message=ST.ERR_UNIPARSE_SEP_LANGENTRY_LINE % Line, \
622                                ExtraData=File.Path)
623
624            if Line.startswith(u'//'):
625                continue
626            elif Line.startswith(u'#langdef'):
627                if len(Line.split()) == 2:
628                    NewLines.append(Line)
629                    continue
630                elif len(Line.split()) > 2 and Line.find(u'"') > 0:
631                    NewLines.append(Line[:Line.find(u'"')].strip())
632                    NewLines.append(Line[Line.find(u'"'):])
633                else:
634                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
635            elif Line.startswith(u'#string'):
636                if len(Line.split()) == 2:
637                    StrName = Line
638                    if StrName:
639                        if StrName.split()[1] not in ExistStrNameList:
640                            ExistStrNameList.append(StrName.split()[1].strip())
641                        elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
642                                                    DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
643                                                    DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
644                                                    DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
645                            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
646                                            Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
647                                            ExtraData=File.Path)
648                    continue
649                elif len(Line.split()) == 4 and Line.find(u'#language') > 0:
650                    if Line[Line.find(u'#language')-1] != ' ' or \
651                       Line[Line.find(u'#language')+len(u'#language')] != u' ':
652                        EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
653
654                    if Line.find(u'"') > 0:
655                        EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
656
657                    StrName = Line.split()[0] + u' ' + Line.split()[1]
658                    if StrName:
659                        if StrName.split()[1] not in ExistStrNameList:
660                            ExistStrNameList.append(StrName.split()[1].strip())
661                        elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
662                                                    DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
663                                                    DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
664                                                    DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
665                            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
666                                            Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
667                                            ExtraData=File.Path)
668                    if IsIncludeFile:
669                        if StrName not in NewLines:
670                            NewLines.append((Line[:Line.find(u'#language')]).strip())
671                    else:
672                        NewLines.append((Line[:Line.find(u'#language')]).strip())
673                    NewLines.append((Line[Line.find(u'#language'):]).strip())
674                elif len(Line.split()) > 4 and Line.find(u'#language') > 0 and Line.find(u'"') > 0:
675                    if Line[Line.find(u'#language')-1] != u' ' or \
676                       Line[Line.find(u'#language')+len(u'#language')] != u' ':
677                        EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
678
679                    if Line[Line.find(u'"')-1] != u' ':
680                        EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
681
682                    StrName = Line.split()[0] + u' ' + Line.split()[1]
683                    if StrName:
684                        if StrName.split()[1] not in ExistStrNameList:
685                            ExistStrNameList.append(StrName.split()[1].strip())
686                        elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
687                                                    DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
688                                                    DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
689                                                    DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
690                            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
691                                            Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
692                                            ExtraData=File.Path)
693                    if IsIncludeFile:
694                        if StrName not in NewLines:
695                            NewLines.append((Line[:Line.find(u'#language')]).strip())
696                    else:
697                        NewLines.append((Line[:Line.find(u'#language')]).strip())
698                    NewLines.append((Line[Line.find(u'#language'):Line.find(u'"')]).strip())
699                    NewLines.append((Line[Line.find(u'"'):]).strip())
700                else:
701                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
702            elif Line.startswith(u'#language'):
703                if len(Line.split()) == 2:
704                    if IsIncludeFile:
705                        if StrName not in NewLines:
706                            NewLines.append(StrName)
707                    else:
708                        NewLines.append(StrName)
709                    NewLines.append(Line)
710                elif len(Line.split()) > 2 and Line.find(u'"') > 0:
711                    if IsIncludeFile:
712                        if StrName not in NewLines:
713                            NewLines.append(StrName)
714                    else:
715                        NewLines.append(StrName)
716                    NewLines.append((Line[:Line.find(u'"')]).strip())
717                    NewLines.append((Line[Line.find(u'"'):]).strip())
718                else:
719                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
720            elif Line.startswith(u'"'):
721                if u'#string' in Line  or u'#language' in Line:
722                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
723                NewLines.append(Line)
724            else:
725                print Line
726                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
727
728        if StrName and not StrName.split()[1].startswith(u'STR_'):
729            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
730                                Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
731                                ExtraData=File.Path)
732
733        if StrName and not NewLines:
734            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
735                            Message=ST.ERR_UNI_MISS_LANGENTRY % StrName, \
736                            ExtraData=File.Path)
737
738        #
739        # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
740        # should be Abstract, Description, BinaryAbstract, BinaryDesctiption
741        AbstractPosition = -1
742        DescriptionPosition = -1
743        BinaryAbstractPosition = -1
744        BinaryDescriptionPosition = -1
745        for StrName in ExistStrNameList:
746            if DT.TAB_HEADER_ABSTRACT.upper() in StrName:
747                if 'BINARY' in StrName:
748                    BinaryAbstractPosition = ExistStrNameList.index(StrName)
749                else:
750                    AbstractPosition = ExistStrNameList.index(StrName)
751            if DT.TAB_HEADER_DESCRIPTION.upper() in StrName:
752                if 'BINARY' in StrName:
753                    BinaryDescriptionPosition = ExistStrNameList.index(StrName)
754                else:
755                    DescriptionPosition = ExistStrNameList.index(StrName)
756
757        OrderList = sorted([AbstractPosition, DescriptionPosition])
758        BinaryOrderList = sorted([BinaryAbstractPosition, BinaryDescriptionPosition])
759        Min = OrderList[0]
760        Max = OrderList[1]
761        BinaryMin = BinaryOrderList[0]
762        BinaryMax = BinaryOrderList[1]
763        if BinaryDescriptionPosition > -1:
764            if not(BinaryDescriptionPosition == BinaryMax and BinaryAbstractPosition == BinaryMin and \
765                   BinaryMax > Max):
766                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
767                                Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
768                                ExtraData=File.Path)
769        elif BinaryAbstractPosition > -1:
770            if not(BinaryAbstractPosition > Max):
771                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
772                                Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
773                                ExtraData=File.Path)
774
775        if  DescriptionPosition > -1:
776            if not(DescriptionPosition == Max and AbstractPosition == Min and \
777                   DescriptionPosition > AbstractPosition):
778                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
779                                Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
780                                ExtraData=File.Path)
781
782        if not self.UniFileHeader:
783            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
784                            Message = ST.ERR_NO_SOURCE_HEADER,
785                            ExtraData=File.Path)
786
787        return NewLines
788
789    #
790    # Load a .uni file
791    #
792    def LoadUniFile(self, File = None):
793        if File == None:
794            EdkLogger.Error("Unicode File Parser",
795                            ToolError.PARSER_ERROR,
796                            Message='No unicode file is given',
797                            ExtraData=File.Path)
798
799        self.File = File
800
801        #
802        # Process special char in file
803        #
804        Lines = self.PreProcess(File)
805
806        #
807        # Get Unicode Information
808        #
809        for IndexI in range(len(Lines)):
810            Line = Lines[IndexI]
811            if (IndexI + 1) < len(Lines):
812                SecondLine = Lines[IndexI + 1]
813            if (IndexI + 2) < len(Lines):
814                ThirdLine = Lines[IndexI + 2]
815
816            #
817            # Get Language def information
818            #
819            if Line.find(u'#langdef ') >= 0:
820                self.GetLangDef(File, Line + u' ' + SecondLine)
821                continue
822
823            Name = ''
824            Language = ''
825            Value = ''
826            CombineToken = False
827            #
828            # Get string def information format as below
829            #
830            #     #string MY_STRING_1
831            #     #language eng
832            #     "My first English string line 1"
833            #     "My first English string line 2"
834            #     #string MY_STRING_1
835            #     #language spa
836            #     "Mi segunda secuencia 1"
837            #     "Mi segunda secuencia 2"
838            #
839            if Line.find(u'#string ') >= 0 and Line.find(u'#language ') < 0 and \
840                SecondLine.find(u'#string ') < 0 and SecondLine.find(u'#language ') >= 0 and \
841                ThirdLine.find(u'#string ') < 0 and ThirdLine.find(u'#language ') < 0:
842                if Line.find('"') > 0 or SecondLine.find('"') > 0:
843                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
844                                Message=ST.ERR_UNIPARSE_DBLQUOTE_UNMATCHED,
845                                ExtraData=File.Path)
846
847                Name = Line[Line.find(u'#string ') + len(u'#string ') : ].strip(' ')
848                Language = SecondLine[SecondLine.find(u'#language ') + len(u'#language ') : ].strip(' ')
849                for IndexJ in range(IndexI + 2, len(Lines)):
850                    if Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') < 0 and \
851                    Lines[IndexJ].strip().startswith(u'"') and Lines[IndexJ].strip().endswith(u'"'):
852                        if Lines[IndexJ][-2] == ' ':
853                            CombineToken = True
854                        if CombineToken:
855                            if Lines[IndexJ].strip()[1:-1].strip():
856                                Value = Value + Lines[IndexJ].strip()[1:-1].rstrip() + ' '
857                            else:
858                                Value = Value + Lines[IndexJ].strip()[1:-1]
859                            CombineToken = False
860                        else:
861                            Value = Value + Lines[IndexJ].strip()[1:-1] + '\r\n'
862                    else:
863                        IndexI = IndexJ
864                        break
865                if Value.endswith('\r\n'):
866                    Value = Value[: Value.rfind('\r\n')]
867                Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
868                self.AddStringToList(Name, Language, Value)
869                continue
870
871    #
872    # Load multiple .uni files
873    #
874    def LoadUniFiles(self, FileList):
875        if len(FileList) > 0:
876            for File in FileList:
877                FilePath = File.Path.strip()
878                if FilePath.endswith('.uni') or FilePath.endswith('.UNI') or FilePath.endswith('.Uni'):
879                    self.LoadUniFile(File)
880
881    #
882    # Add a string to list
883    #
884    def AddStringToList(self, Name, Language, Value, Token = 0, Referenced = False, UseOtherLangDef = '', Index = -1):
885        for LangNameItem in self.LanguageDef:
886            if Language == LangNameItem[0]:
887                break
888
889        if Language not in self.OrderedStringList:
890            self.OrderedStringList[Language] = []
891            self.OrderedStringDict[Language] = {}
892
893        IsAdded = True
894        if Name in self.OrderedStringDict[Language]:
895            IsAdded = False
896            if Value != None:
897                ItemIndexInList = self.OrderedStringDict[Language][Name]
898                Item = self.OrderedStringList[Language][ItemIndexInList]
899                Item.UpdateValue(Value)
900                Item.UseOtherLangDef = ''
901
902        if IsAdded:
903            Token = len(self.OrderedStringList[Language])
904            if Index == -1:
905                self.OrderedStringList[Language].append(StringDefClassObject(Name,
906                                                                             Value,
907                                                                             Referenced,
908                                                                             Token,
909                                                                             UseOtherLangDef))
910                self.OrderedStringDict[Language][Name] = Token
911                for LangName in self.LanguageDef:
912                    #
913                    # New STRING token will be added into all language string lists.
914                    # so that the unique STRING identifier is reserved for all languages in the package list.
915                    #
916                    if LangName[0] != Language:
917                        if UseOtherLangDef != '':
918                            OtherLangDef = UseOtherLangDef
919                        else:
920                            OtherLangDef = Language
921                        self.OrderedStringList[LangName[0]].append(StringDefClassObject(Name,
922                                                                                        '',
923                                                                                        Referenced,
924                                                                                        Token,
925                                                                                        OtherLangDef))
926                        self.OrderedStringDict[LangName[0]][Name] = len(self.OrderedStringList[LangName[0]]) - 1
927            else:
928                self.OrderedStringList[Language].insert(Index, StringDefClassObject(Name,
929                                                                                    Value,
930                                                                                    Referenced,
931                                                                                    Token,
932                                                                                    UseOtherLangDef))
933                self.OrderedStringDict[Language][Name] = Index
934
935    #
936    # Set the string as referenced
937    #
938    def SetStringReferenced(self, Name):
939        #
940        # String stoken are added in the same order in all language string lists.
941        # So, only update the status of string stoken in first language string list.
942        #
943        Lang = self.LanguageDef[0][0]
944        if Name in self.OrderedStringDict[Lang]:
945            ItemIndexInList = self.OrderedStringDict[Lang][Name]
946            Item = self.OrderedStringList[Lang][ItemIndexInList]
947            Item.Referenced = True
948
949    #
950    # Search the string in language definition by Name
951    #
952    def FindStringValue(self, Name, Lang):
953        if Name in self.OrderedStringDict[Lang]:
954            ItemIndexInList = self.OrderedStringDict[Lang][Name]
955            return self.OrderedStringList[Lang][ItemIndexInList]
956
957        return None
958
959    #
960    # Search the string in language definition by Token
961    #
962    def FindByToken(self, Token, Lang):
963        for Item in self.OrderedStringList[Lang]:
964            if Item.Token == Token:
965                return Item
966
967        return None
968
969    #
970    # Re-order strings and re-generate tokens
971    #
972    def ReToken(self):
973        if len(self.LanguageDef) == 0:
974            return None
975        #
976        # Retoken all language strings according to the status of string stoken in the first language string.
977        #
978        FirstLangName = self.LanguageDef[0][0]
979
980        # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
981        for LangNameItem in self.LanguageDef:
982            self.OrderedStringListByToken[LangNameItem[0]] = {}
983
984        #
985        # Use small token for all referred string stoken.
986        #
987        RefToken = 0
988        for Index in range (0, len (self.OrderedStringList[FirstLangName])):
989            FirstLangItem = self.OrderedStringList[FirstLangName][Index]
990            if FirstLangItem.Referenced == True:
991                for LangNameItem in self.LanguageDef:
992                    LangName = LangNameItem[0]
993                    OtherLangItem = self.OrderedStringList[LangName][Index]
994                    OtherLangItem.Referenced = True
995                    OtherLangItem.Token = RefToken
996                    self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
997                RefToken = RefToken + 1
998
999        #
1000        # Use big token for all unreferred string stoken.
1001        #
1002        UnRefToken = 0
1003        for Index in range (0, len (self.OrderedStringList[FirstLangName])):
1004            FirstLangItem = self.OrderedStringList[FirstLangName][Index]
1005            if FirstLangItem.Referenced == False:
1006                for LangNameItem in self.LanguageDef:
1007                    LangName = LangNameItem[0]
1008                    OtherLangItem = self.OrderedStringList[LangName][Index]
1009                    OtherLangItem.Token = RefToken + UnRefToken
1010                    self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1011                UnRefToken = UnRefToken + 1
1012
1013    #
1014    # Show the instance itself
1015    #
1016    def ShowMe(self):
1017        print self.LanguageDef
1018        #print self.OrderedStringList
1019        for Item in self.OrderedStringList:
1020            print Item
1021            for Member in self.OrderedStringList[Item]:
1022                print str(Member)
1023
1024    #
1025    # Read content from '!include' UNI file
1026    #
1027    def ReadIncludeUNIfile(self, FilaPath):
1028        if self.File:
1029            pass
1030
1031        if not os.path.exists(FilaPath) or not os.path.isfile(FilaPath):
1032            EdkLogger.Error("Unicode File Parser",
1033                             ToolError.FILE_NOT_FOUND,
1034                             ExtraData=FilaPath)
1035        try:
1036            FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16').readlines()
1037        except UnicodeError:
1038            FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16_le').readlines()
1039        except:
1040            EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=FilaPath)
1041        return FileIn
1042
1043