You can add spelling aid to your Microsoft Windows application by using the DICTGET components in the English Dictionary package. The package includes dictionaries called "us1.dic", "usuk.dic", and "medical.dic" and a dynamic link library (azdict.dll) that provides the interface to these dictionaries. The US1 dictionary contains United States vocabulary; the USUK dictionary combines United Kingdom and American spellings. Each of these dictionaries has approximately 101,000 words. The medical dictionary has approximately 66,000 words.
It is recommended that you install DICTGET to familiarize yourself with the lookup and spelling aid functions. After you have coded your application, you can distribute azdict.dll and one or more dictionaries with your own package. The description below makes it possible to use the dictionaries for grammatical analysis, interactive educational packages, etc. The programs and dictionaries can be used free for research and individual use. Commercial use requires licensing through A. Zamora (Please use the e-mail address listed in the HOME PAGE). Commercial licenses may include consultation and maintenance.
The description below shows how to use the dynamic load library AZDICT.DLL from Visual Basic and C-Language applications.
The AZDICT.DLL supports six functions for managing access to the dictionaries. A 1200-byte area must be provided by the calling program for each dictionary that is used. AZDICT.DLL uses this area as a control block to keep pointers, switches, and other information needed during processing.
The six functions are:
/* * Initialize dictionary, allocate dynamic areas * * * * * */ int azinit(char * cb, char * dicname); /* cb is a control block used by azdict.dll (1200 bytes) */ /* dicname is the dictionary path name (63-byte maximum) */ /* azinit returns 0 for normal completion, >0 otherwise */ /* * Lookup word in dictionary * * * * * */ int azlook(char * cb, char * word, char * wordinfo) ; /* cb is a control block used by azdict.dll (1200 bytes) */ /* word is the word to be looked-up (40 bytes) */ /* wordinfo is the information, if any, returned for the word (80 bytes) */ /* azlook returns the number of paradigms found, 0 otherwise */ /* 255 is returned if only word information is found */ /* word information is used for contractions such as "can't" that don't have paradigms */ /* * Provide spelling aid for misspelling* */ int azaid(char * cb, char * word, int * aidct, char * aidlst) ; /* cb is a control block used by azdict.dll (1200 bytes) */ /* word is the word for which spelling candidates should be provided (40 bytes) */ /* aidct is the number of candidates returned (zero to six) */ /* aidlst is an array of six 42-byte areas for candidates */ /* The first two bytes are the distance measure (binary), 40 bytes are for the aid candidate */ /* azaid returns 0 for normal execution, >0 otherwise */ /* * Get paradigm for a word * * * * * */ int azpdg(char * cb, int npdg, int * nforms, char * typc, char * forms); /* cb is a control block used by azdict.dll (1200 bytes) */ /* npdg is the paradigm no. to be expanded */ /* nforms is the number of paradigm forms returned */ /* typc is the type code for the paradigm returned (6 bytes) */ /* forms is the 9*60 array for the returned paradigm */ /* * Get all paradigm information for a word * * * * * */ int azpdg2(char * cb, char * pdgdta) ; /* cb is a control block used by azdict.dll (1200 bytes) */ /* pdgdta is a pointer to data structure (wordinfo.h) for output */ /* NOTE: azlook must be called before calling azpdg2 */ /* * Terminate processing, free all dynamic areas * * * * */ int azterm(char * cb) ; /* cb is a control block used by azdict.dll (1200 bytes) */ /* azterm returns 0 for normal execution, >0 otherwise */
/* WORDINFO.H */ #define MAXPDGMS 9 /* Maximum paradigms for one word */ #define MAXFORM 9 /* Maximum word-forms per paradigm */ #define MAXMORPH 3 /* Maximum morphology attributes per word form */ struct wordinfo { char wd[40]; /* word looked up (null terminated string) */ char pdgmct; /* number of paradigms for the word */ struct { /* paradigm array for the word */ char typcod[6]; /* paradigm type code */ char formct; /* number of word forms for current paradigm */ struct { /* word-form array for paradigm */ char entry[60]; /* word-form entry (null-terminated string) */ char attrct; /* number of attribute masks for word-form entry */ struct { char attrmask[3]; /* grammatical attribute code */ } wdattr[MAXMORPH]; /* attribute code entry for word form */ } wdform[MAXFORM]; /* end word-form array for paradigm */ } wdpdgm[MAXPDGMS]; /* end paradigm array for the word */ }; /* end wordinfo */
0x008000 | Infinitive |
0x004000 | Present |
0x002000 | Past |
0x001000 | Present participle |
0x000800 | Past participle |
0x000400 | First person |
0x000200 | Second person |
0x000100 | Third person |
0x000080 | Singular |
0x000040 | Plural |
0x000020 | Positive |
0x000010 | Comparative |
0x000008 | Superlative |
0x000004 | Possessive |
0x000002 | Masculine |
0x000001 | Feminine |
The last character of the type code is the part of speech. For FN, the "N" indicates that it is a noun.
V | Verb |
DV | Defective verb |
MV | Modal verb |
N | Noun |
MN | Masculine noun |
FN | Feminine noun |
PN | Plural noun |
PMN | Plural masculine noun |
PFN | Plural feminine noun |
J | Adjective |
UJ | Uninflected adjective |
A | Adverb |
UA | Uninflected adverb |
R | Preposition |
I | Interjection |
ART | Article |
P | Pronoun |
USP | 1st person singular pronoun |
DSP | 2nd person singular pronoun |
DPP | 2nd person plural pronoun |
TMSP | 3rd person masc. sing. pronoun |
TFSP | 3rd person fem. sing. pronoun |
TPP | 3rd person plural pronoun |
TNSP | 3rd person neuter singular pronouns with no nominal possessive form |
UP | Uninflected pronoun |
DP | Demonstrative pronoun |
QP | Quantitative pronoun |
IDP | Indefinite pronoun |
QPP | Quantitative plural pronoun |
PP | Plural pronoun |
URP | Relative pronoun |
UIP | Uninflected interrogative pronoun |
RP | Relative pronoun |
TP | Relative pronoun 2 |
IP | Interrogative pronoun |
QCSP | Quantitative pronoun |
The azdict.dll is a 32-bit program written in the C-Language and developed with MS Visual Studio. It has been tested successfully for Visual Basic 4 and 5. When you try to execute using the "RUN" command from the Visual Basic development environment you may get "Runtime error 48, Error in loading DLL" or "Runtime error 49, Bad DLL calling convention". You must generate an EXE file, and test by executing the EXE file (from the Windows RUN command or by double clicking the EXE file that you created). The azdict.dll and the dictionaries must be in the same directory where the EXE file resides. The disadvantage of running from the EXE is that you do not have the VB debugger to help you to trace errors in your code. For debugging, you have to use print statements or code dummy replacements for the azdict functions so that you can use the VB runtime debugger.
' azinit initializes dictionary Declare Function azinit Lib "azdict" _ (ByRef p1 As Byte, _ ByVal p2 As String) As Integer ' azlook does dictionary look-up Declare Function azlook Lib "azdict" _ (ByRef p1 As Byte, _ ByVal p2 As String, _ ByVal p3 As String) As Integer ' azaid gives aid candidates Declare Function azaid Lib "azdict" _ (ByRef p1 As Byte, _ ByVal p2 As String, _ ByRef i1 As Integer, _ ByVal p3 As String) As Integer ' azpdg gives paradigm information Declare Function azpdg Lib "azdict" _ (ByRef p1 As Byte, _ ByVal i1 As Integer, _ ByRef i2 As Integer, _ ByVal p2 As String, _ ByVal p3 As String) As Integer ' azpdg2 returns paradigm information structure Declare Function azpdg2 Lib "azdict" _ (ByRef p1 As Byte, _ ByRef p2 As Byte) As Integer ' azterm closes dictionary and frees dynamic areas Declare Function azterm Lib "azdict" _ (ByRef p1 As Byte) As Integer
Dim CB(1200) As Byte Dim wrd As String Dim wrdinfo As String * 80 Dim aidlist As String * 252 ' 6*42 Dim aidct As Integer Dim nopdg As Integer Dim pdgmno As Integer Dim noofent As Integer Dim typcod As String * 6 Dim entries As String * 540 '9*60 Dim wd1(5800) As Byte ' size of wordinfo.h
' Initialization dictpath = App.Path & "\usuk.dic" dictname = dictpath If Dir(dictname) = "" Then MsgBox "Did not find dictionary " & dictpath exit End If rc = azinit(CB(1), dictname) If rc > 0 Then MsgBox "Invalid dictionary " & dictpath exit End If ' Successful initialization
In addition to checking the dictionaries provided, your program may need to check a plain text addendum dictionary that you create or update during spelling aid. The addendum dictionary contains words not found in any of the dictionaries accessed by AZLOOK.
' Dictionary lookup nopdg = 0 nopdg = azlook(CB(1), wrd, wrdinfo) If nopdg > 0 Then ' Text2.Text = "Word matched" & vbCrLf & "number of paradigms=" & nopdg If nopdg = 255 Then Label3.Caption = "Information: " & wrdinfo Else Label3.Caption = "Number of paradigms = " & nopdg ' display all paradigms ' Call the morphology subroutine II rc = azpdg2(CB(1), wd1(1)) If rc = 0 Then ' display paradigm(s) Call DecodeWordInfo End If End If Else ' The word was not found. ' Lookup word in another dictionary initialized with a different control block. ' Lookup in your plain text addendum dictionary. ' Do spelling aid if all the lookups failed. End If
When using more than one dictionary, use arrays to store the words and the distance values returned by spelling aid. Check every candidate from the second dictionary against the array before adding a word. This eliminates duplicate words that may occur due to overlap in the contents of the dictionaries. Also, using arrays makes it possible to rank the combined list of aid candidates by decreasing similarity using the distance value returned by spelling aid.
A general dictionary may match 98% of the words in typical text. The remaining words may be misspellings, but they are also likely to be infrequent words, abbreviations or proper names not included in the dictionary. You can create an addendum dictionary (a plain text file) where you can save words that are not in the dictionary but that the user of your program considers to be correct. A good spelling aid interface offers the user a list of aid candidates, but it also offers a choice to add words to an addendum dictionary.
' Spelling aid Dim aidct As Integer Dim totaid As Integer Dim c1 As String Dim dist(12) As Integer Dim aidwd(12) As String aidct = 0 totaid = 0 If sw1 = 1 Then ' First dictionary aidlist = String(252, 32) ' set aidlist to blanks rc = azaid(CB(1), wrd, aidct, aidlist) If aidct > 0 Then ' totaid = aidct For i = 1 To aidct c1 = Mid(aidlist, 1 + ((i - 1) * 42), 1) i1 = Asc(c1) c1 = Mid(aidlist, 2 + ((i - 1) * 42), 1) i1 = i1 * 256 + Asc(c1) ' i1 contains the distance value of the aid candidate wrk = Mid(aidlist, 3 + ((i - 1) * 42), 40) wd = "" For j = 1 To 40 c1 = Mid(wrk, j, 1) If c1 = Chr(0) Then Exit For wd = wd & c1 Next j totaid = totaid + 1 dist(totaid) = i1 ' add distance to array aidwd(totaid) = wd ' add spelling candidate to array Next i End If End If ' At this point AZAID may be invoked for another dictionary If totaid = 0 Then List1.AddItem "No similar words" Else ' Sort entries by decreasing distance ' this is only needed when two dictionaries are used For i = 1 To totaid - 1 For j = i + 1 To totaid If dist(i) > dist(j) Then dist(0) = dist(j) aidwd(0) = aidwd(j) dist(j) = dist(i) aidwd(j) = aidwd(i) dist(i) = dist(0) aidwd(i) = aidwd(0) End If Next j Next i ' Display aid candidates For i = 1 To totaid List1.AddItem aidwd(i) Next i End If
' Terminate dictionary processing, free dynamic areas
rc = azterm(CB(1))
' This subroutine decodes the morphology data structure ' in WORDINFO.H ' The subroutine assumes that wd1() is a Byte array containing ' the structure data and that the output is to be decoded in ' text2. Sub DecodeWordInfo() Dim c1 As String NL = vbCrLf Text2.Visible = False ' suppress flashing during update Text2.Text = "Word: " Text2.Font.Bold = False tmp = "" For i = 1 To 40 If Asc(Chr(wd1(i))) <> 0 Then tmp = tmp & Chr(wd1(i)) End If Next i Text2.Text = Text2.Text & tmp Text2.Text = Text2.Text & NL i1 = 41 'current position pdgmct = wd1(i1) i1 = i1 + 1 'Text2.Text = Text2.Text & "pdgmct=" & pdgmct & NL For ix1 = 1 To pdgmct 'for each paradigm typecode = "" pos = "" For i = 1 To 6 If Asc(Chr(wd1(i1))) <> 0 Then typecode = typecode & Chr(wd1(i1)) pos = Mid(typecode, i, 1) End If i1 = i1 + 1 Next i If pos = "V" Then tmp = "Verb" ElseIf pos = "N" Then tmp = "Noun" ElseIf pos = "I" Then tmp = "Interjection" ElseIf pos = "R" Then tmp = "Preposition" ElseIf pos = "C" Then tmp = "Conjunction" ElseIf pos = "J" Then tmp = "Adjective" ElseIf pos = "A" Then tmp = "Adverb" ElseIf pos = "P" Then tmp = "Pronoun" ElseIf pos = "T" Then tmp = "Article" Else tmp = pos End If Text2.Text = Text2.Text & NL & "===== " Text2.Text = Text2.Text & tmp & " (" & typecode Text2.Text = Text2.Text & ")" Text2.Text = Text2.Text & " =====" & NL formct = wd1(i1) i1 = i1 + 1 'Text2.Text = Text2.Text & "formct=" & formct & NL For ix2 = 1 To 9 ' for each word form If ix2 <= formct Then wdform = "" For i = 1 To 60 If Asc(Chr(wd1(i1))) <> 0 Then wdform = wdform & Chr(wd1(i1)) End If i1 = i1 + 1 Next i Text2.Text = Text2.Text & wdform Text2.Text = Text2.Text & NL attrct = wd1(i1) i1 = i1 + 1 'Text2.Text = Text2.Text & "attrct=" & attrct & NL For ix3 = 1 To 3 ' for every attribute entry If ix3 <= attrct Then mask = "" maskval = 0 For i = 1 To 3 ' 3-byte attribute mask tmp = Hex(wd1(i1)) j = wd1(i1) If Len(tmp) < 2 Then tmp = "0" & tmp End If mask = mask & tmp i1 = i1 + 1 maskval = maskval * 256 + j Next i 'Text2.Text = Text2.Text & " " & mask & NL If maskval > 0 Then tmp = " " If (maskval And &H8000) > 0 Then tmp = tmp & "Infinitive " End If If (maskval And &H4000) > 0 Then tmp = tmp & "Present " End If If (maskval And &H2000) > 0 Then tmp = tmp & "Past " End If If (maskval And &H1000) > 0 Then tmp = tmp & "Present Participle " End If If (maskval And &H800) > 0 Then tmp = tmp & "Past Participle " End If If (maskval And &H700) = &H700 Then tmp = tmp & "1st,2nd,3rd_Person " Else If (maskval And &H600) = &H600 Then tmp = tmp & "1st,2nd_Person " Else If (maskval And &H400) > 0 Then tmp = tmp & "1st_Person " End If If (maskval And &H200) > 0 Then tmp = tmp & "2nd_Person " End If If (maskval And &H100) > 0 Then tmp = tmp & "3rd_Person " End If End If End If If (maskval And &H80) > 0 Then tmp = tmp & "Singular " End If If (maskval And &H40) > 0 Then tmp = tmp & "Plural " End If If (maskval And &H20) > 0 Then tmp = tmp & "Positive " End If If (maskval And &H10) > 0 Then tmp = tmp & "Comparative " End If If (maskval And &H8) > 0 Then tmp = tmp & "Superlative " End If If (maskval And &H4) > 0 Then tmp = tmp & "Possessive " End If If (maskval And &H2) > 0 Then tmp = tmp & "Masculine " End If If (maskval And &H1) > 0 Then tmp = tmp & "Feminine " End If Text2.Text = Text2.Text & tmp & NL End If ' maskval > 0 Else i1 = i1 + 3 '3 bytes for each mask entry End If Next ix3 Else i1 = i1 + 70 '70 bytes for each form entry End If Next ix2 ' next word form Next ix1 ' next paradigm Text2.Visible = True End Sub
/* C-Language Driver for Spelling Aid with two dictionaries */ /* 06/19/2002 - A. Zamora */ #include <string.h> #include <stdio.h> #include "wordinfo.h" /* prototypes for dictionary interface */ extern int azinit(char * cb, char * dicname); extern int azlook(char * cb, char * word, char * wordinfo); extern int azaid(char * cb, char * word, int *aidct, char * aidlst); extern int azpdg2(char * cb, char * pdgdta); extern int azterm(char * cb); int main (int argc, char * argv[]) { int i, j, rci; char moword[40]; /* word for which morphology is needed */ char wdinfo[40]; char *p1, *p2; /* Variables for dictvb (usuk.dic & medical.dic) */ char CB1[1200]; /* usuk.dic dictionary control block */ char CB2[1200]; /* medical.dic dictionary control block */ int look1rc, look2rc; int aid1rc, aid2rc; #define ADLEN 42 int aidct1; char aidlst1[6][ADLEN]; /* 2-byte distance, 40-byte aid candidate */ int aidct2; char aidlst2[6][ADLEN]; /* 2-byte distance, 40-byte aid candidate */ char usukpath[64]; char medpath[64]; int totaid; char mergedaid[12][ADLEN]; /* Merged aid list for 2 dictionaries */ strcpy(usukpath,"usuk.dic"); strcpy(medpath,"medical.dic"); /* * * * * * * * * * * * * * */ /* initialize control block */ /* * * * * * * * * * * * * * */ rci = azinit(CB1, usukpath); if (rci > 0) { printf("%s Dictionary could not be initialized.\n",usukpath); return(rci); } rci = azinit(CB2, medpath); if (rci > 0) { printf("%s Dictionary could not be initialized.\n",medpath); rci = azterm(CB1); /* terminate dictionary 1 */ return(rci); } /* start interactive loop */ printf("\nInput a word, or 0 <ENTER> to quit.\n\n"); rci = scanf("%s", &moword); while (rci == 1){ if (moword[0] == '0' || moword[0] == '\0') break; /* Do a lookup in 1st dictionary (USUK) */ look1rc = azlook(CB1, moword, wdinfo); /* azlook returns the number of paradigms found, 0 otherwise */ /* 255 is returned if only word information is found */ if (look1rc == 255) { printf("==> %s\n", wdinfo); } else { /* look1rc != 255 */ if (look1rc == 0) { /* Word was not found in USUK dictionary */ /* Do a lookup in 2nd dictionary (MEDICAL) */ look2rc = azlook(CB2, moword, wdinfo); if (look2rc == 255) { printf("==> %s\n", wdinfo); } else { /* look2rc != 255 */ if (look2rc == 0) { /* Word was not found in Medical dictionary either. */ printf("Lookup - no match found.\n"); /* Get aid candidates from USUK dictionary */ p1 = &(aidlst1[0][0]); aid1rc = azaid(CB1, moword, &aidct1, p1); /* azaid returns 0 for normal execution, >0 otherwise */ if (aid1rc == 0) { totaid = 0; /* copy USUK aid candidates to merged aid list */ for (i=0; i<aidct1; i++) { /* note: memcpy is used because distance may have nulls */ memcpy(mergedaid[totaid++],aidlst1[i],ADLEN); } } /* aid1rc == 0 */ /* Get aid candidates from Medical dictionary */ p1 = &(aidlst2[0][0]); aid2rc = azaid(CB2, moword, &aidct2, p1); /* azaid returns 0 for normal execution, >0 otherwise */ if (aid2rc == 0) { /* Process medical dictionary aid candidates */ for (i=0; i<aidct2; i++) { p1 = aidlst2[i]; /* compare against candidates from USUK dictionary */ for (j=0; j<aidct1; j++) { p2 = aidlst1[j]; if (strcmp(p1+2,p2+2) == 0) goto duplicate; /* avoid duplicates */ } /* Add candidate to merged aid list */ memcpy(mergedaid[totaid++],aidlst2[i],ADLEN); duplicate: ; } } /* aid2rc == 0 */ if (totaid > 0) { /* Sort merged aid list in increasing distance */ /* Use aidlist1[0] for temporary storage */ for (i=0; i<totaid-1; i++) { p1 = mergedaid[i]; for (j=i+1; j<totaid; j++) { p2 = mergedaid[j]; if (strncmp(p1,p2,2) > 0) { /* exchange entries */ memcpy(aidlst1[0],p1,ADLEN); memcpy(p1,p2,ADLEN); memcpy(p2,aidlst1[0],ADLEN); } } } /* Display aid candidates */ printf("%d Aid Candidates:\n", totaid); for (i=0; i<totaid; i++) { p1 = mergedaid[i]; printf(" %s\n", p1+2); } } else { printf("No aid candidates.\n"); } } /* look2rc == 0 */ else { /* look2rc > 0*/ printf("Word was found in MEDICAL.DIC\n"); } /* look2rc > 0*/ } /* look2rc != 255 */ } /* look1rc == 0 */ else { /* look1rc > 0 */ /* the word was found */ printf("Word was found in USUK.DIC\n"); } /* look1rc > 0 */ } /* look1rc != 255 */ printf("\nInput a word, or 0 <ENTER> to quit.\n\n"); rci = scanf("%s", &moword); } /* input loop while (rci == 1) */ /* * * * * * * * * * * * * * * */ /* Terminate dictionary access */ /* * * * * * * * * * * * * * * */ rci = azterm(CB1); rci = azterm(CB2); return (rci); } /* end main */