Direct Threaded Daydreams

COM Scripting in C by way of JavaScript

February 7, 2010 · Leave a Comment

I have recently needed to interact with some common ActiveX / COM ( Component Object Model ) objects via plain old C. A handful of examples can be found on the Internet, but the process of orchestrating the data-structures and function-calls that are necessary can initially be a little daunting.

As I pored over the documentation, I began to realize why most C++ IDE’s will generate a very nice abstraction layer to represent a COM control in software. I was going to have to code some very specific code for each type of COM object that I wanted to instantiate.

I had hoped to be able to build a general library that would allow me to access COM components at will from a C program.

The more I dug into the subject, the more I came to realize that I would benefit from some sort of embedded interpreter that could abstract the COM interfaces for me. Many scripting languages provide some sort of simple way to handle COM.

The Microsoft ScriptControl object is itself a COM object that exposes scripting engines to a COM client. I had learned enough about creating a COM client in C that I decided to try to build a library of functions that would evaluate JavaScript expressions via the ScriptControl. I could then use embedded JavaScript to create and interact with COM objects.

Please note that the ScriptControl object may not already be installed on a given Windows machine. If the control isn’t present on your machine, please look for a recent version online.

As part of the library, I needed functions that would convert between ANSI strings and Unicode OLE strings. I found the two functions I needed here:

http://support.microsoft.com/kb/138813

I have included these functions in a separate file ( unicode_conv.c ) in the archive listed at the end of this file, but I do not claim any kind of copyright on them.

My interface to the ScriptControl is in this library:

com_script.c

// Plain C interface to the ScriptControl object
//
// License: MIT / X11
// Copyright (c) 1999. 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

#include <windows.h>
#include <stdio.h>
#include "unicode_conv.h"

void javascript(LPOLESTR expression)  {
   HRESULT hr; 

   IDispatch *scrDisp;
   WCHAR *tmpPtr;
   DISPID dispID;
   CLSID scrClsid; 

   VARIANT parm;
   VARIANT result;
   char *msg;

   DISPPARAMS dispParams = { NULL, NULL, 0, 0 };
   DISPID dispidNamed = DISPID_PROPERTYPUT;

   {
         // Get CLSID for ScriptControl Application from registry.
      hr = CLSIDFromProgID(L"ScriptControl", &scrClsid);
      if(FAILED(hr)) {
         printf("ScriptControl not found.\n");
         return;
      }
         // Start Scriptcontrol and get its IDispatch pointer.
      hr = CoCreateInstance(&scrClsid, NULL,
         CLSCTX_LOCAL_SERVER|CLSCTX_INPROC_SERVER,
         &IID_IDispatch, (void **)&scrDisp);
      if(FAILED(hr)) {
         printf("Could not create instance of ScriptControl.\n");
         return;
      }  

         // Get the 'Language' property's DISPID.
      tmpPtr = L"Language";
      scrDisp->lpVtbl->GetIDsOfNames(scrDisp, &IID_NULL, &tmpPtr, 1,
         LOCALE_USER_DEFAULT, &dispID);

      VariantInit(&parm);
      parm.vt = VT_BSTR;
      parm.bstrVal = SysAllocString( OLESTR("JavaScript"));

      dispParams.cArgs = 1;
      dispParams.rgvarg = &parm;

      dispParams.cNamedArgs = 1;
      dispParams.rgdispidNamedArgs = &dispidNamed;

      hr = scrDisp->lpVtbl->Invoke(scrDisp,
         dispID, &IID_NULL, LOCALE_SYSTEM_DEFAULT,
         DISPATCH_PROPERTYPUT | DISPATCH_METHOD,
         &dispParams, NULL, NULL, NULL
      );
      if(FAILED(hr)) {
         printf("Could not change Language property to 'JavaScript'. HRESULT=%08lx\n",hr);
      }

      tmpPtr = L"Eval";
      scrDisp->lpVtbl->GetIDsOfNames(scrDisp, &IID_NULL, &tmpPtr, 1,
         LOCALE_USER_DEFAULT, &dispID);

      VariantInit(&parm);
      parm.vt = VT_BSTR;
      parm.bstrVal = SysAllocString( expression);

      dispParams.cArgs = 1;
      dispParams.rgvarg = &parm;

      dispParams.cNamedArgs = 0;

      VariantInit(&result);

      hr = scrDisp->lpVtbl->Invoke(scrDisp,
         dispID, &IID_NULL, LOCALE_SYSTEM_DEFAULT,
         DISPATCH_PROPERTYPUT | DISPATCH_METHOD,
         &dispParams, &result, NULL, NULL
      );
      if(FAILED(hr)) {
         printf("Call to Eval() failed. HRESULT=%08lx", hr);
         return;
      }

      switch(result.vt) {
         case VT_EMPTY:
            printf("No return value.");
            break;
         case VT_NULL:
            printf("NULL return value.");
            break;
         case VT_I4: // integer
            printf("Result: %d\n",result.intVal);
            break;
         case VT_BSTR:
            UnicodeToAnsi(result.bstrVal,&msg);
            printf("Result: %s\n",msg);
            break;
         default:
            printf("Unhandled VARIANT type %d in result.\n",result.vt);
      }
   }
}

The first thing the function does is to look up the CLSID for the ScriptControl object. After attaining the CLSID, the function instantiates an instance of the control. ( Please note that I have not provided a way to release this object, yet. This code is a work-in-progress. ).

Once the ScriptControl object is instantiated, the function sets the Language property to “JavaScript”. It then takes the Unicode string passed in as a parameter and invokes the object’s Eval() method.

If the type of the resultant VARIANT object is in the set of handled types, a message will be displayed on the console containing the return-value.

To test the library, I first tried to use the speech API:

speech_test.c

// Invoke Speech API via JavaScript via ScriptControl
//
// License: MIT / X11
// Copyright (c) 1999. 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

#include <windows.h>
#include <stdio.h>
#include "com_script.h"
#include "unicode_conv.h"

void javascript(LPOLESTR );

void main(void) {
   LPOLESTR oleString;

   OleInitialize(NULL);
   AnsiToUnicode(
      "var voice=new ActiveXObject('SAPI.SpVoice');"
      "function speak(s) {"
      "   voice.Speak(s,1);"
      "   voice.WaitUntilDone(-1);"
      "}"
      "speak('I like peanut butter.');",
     &oleString);

   javascript(oleString);

   CoTaskMemFree(oleString);

   OleUninitialize();
}

To compile the source enter the line:

com_comp.bat speech_test.c

The source for com_comp.bat is as follows:

cl %1 com_script.c unicode_conv.c /link ole32.lib user32.lib oleaut32.lib uuid.lib

When you run the program, you should hear your computer speak the words “I like peanut butter”, if you have the speech API installed.

A more generic testbed is as follows ( note that I never release the ScriptControl object in this example … that should be added before placing any code into a production environment that uses this technique. )

js_test.c

// Test JavaScript COM scripting
//
// License: MIT / X11
// Copyright (c) 1999. 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

#include <windows.h>
#include <stdio.h>
#include "com_script.h"
#include "unicode_conv.h"

void javascript(LPOLESTR );

void main(void) {
   LPOLESTR oleString;
   char buff[2048];

   OleInitialize(NULL);
   printf("Enter a JavaScript expression:\n");
   while(fgets(buff,2047,stdin)!=NULL) {
      AnsiToUnicode(buff,&oleString);
      javascript(oleString);
      CoTaskMemFree(oleString);
      printf("\nEnter a JavaScript expression:\n");
   }
   OleUninitialize();
}

Compile the above using the command:

com_comp.bat js_test.c

Then, run js_test.exe. You will be prompted to enter a JavaScript expression. The program will Eval() the expression and will display the result.

If you enter the expression:

2+2

…you should see the result 4.

If you enter the expression:

parseInt(Math.random()*444)

…you’ll hopefully see an integer in the range of 0 to 443 inclusive.

Note that you cannot call methods like alert() or WScript.Echo() as they are exposed to the ScriptControl by their respective container programs.

I now have what I consider to be the precursor to an appropriate embeddable ScriptControl library.

I have yet to do the following:

  • Separate the ScriptControl instantiation from the Eval() call
  • Provide a function to release the instantiated ScriptControl object
  • Add support for the Clear() and Reset() method calls
  • Add support for the AddObject() method call to allow the JavaScript library to be able to call functions exposed in the C code
  • Add a better way to more generally handle the VARIANT result
  • Add support for VBScript as an alternative
  • Bottle the whole thing up in a DLL

All source code, executable files, and compile batch file can be downloaded from a single archive at:
http://www.mailsend-online.com/wp/comscript.zip

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2010 by James K. Lawless

→ Leave a CommentCategories: C · Javascript
Tagged: , , , , ,

Open Source Licenses

January 27, 2010 · Leave a Comment

I’d like to take a moment and describe why I place a license-statement in most of the source files you find here.

My goals are pretty simple for most of the source that I will release:

  • I want you to be able to use my source-code whether you’re a business or whether you want to use if for personal computing.
  • I want to disclaim liability for your use of any of the code.
  • I want to retain my own rights to use the same code as I see fit.

Some have asked why I don’t just omit the license or they’ve asked why I don’t put the code into the public domain.

I’ve read a few articles on this subject and have found that the term public domain has different meanings in different countries. In fact, in some countries, you cannot place your code into the public domain just by stating it in the source. My omission of a license statement would then infer that I hold exclusive rights to the source in some countries.

Please refer to the commentary at Stack Overflow:

http://stackoverflow.com/questions/219742/open-source-why-not-release-into-public-domain

I use the MIT / X11 license for most of the source here. It’s a simple license that does not require you to release your own source under any conditions. I believe it to be simple and clear. I think it gets the point across that you can use the source that contains the MIT / X11 license as you see fit.

I apologize for the couple of dozen lines of comments that prefix most of the source files, but I believe them to be necessary so that everyone who wants to use the source can do so.

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2010 by James K. Lawless

→ Leave a CommentCategories: Tale Recursion
Tagged:

Charging by the Byte

January 26, 2010 · Leave a Comment

In my college days in the mid-80’s, my computers of choice were the Commodore 64 and Commodore 128. During this time, I was an active member of the local C-64 Bulletin-Board System (BBS) community.

One evening, I received a private e-mail asking if I was up for some paid consulting work for the C-64. I didn’t know the gentleman who asked, but he was a friend-of-a-friend, so I agreed to take a look at the problem. I’m afraid I can’t remember the requestor’s name, so I’ll just refer to him as “Mr. X”.

Mr. X was a pharmacist who had built some sort of system in C-64 BASIC that helped him at his job. He had built a small database of sorts using RELative files to hold the data. To speed up the code a bit, he used a machine-language BASIC extension library from an Abacus book. This enhanced BASIC library intercepted the routines that process BASIC, wedged in some new commands, and made some alterations to some existing commands.

His BASIC code ran better with the extensions that dealt with RELative files as they provided a verb to read in data by record-number directly into a string. The syntax was something like:

READ@ s$, file, rec

Where file was the open file-number and rec was the desired record-number.

To enhance his program even further, he tried to compile the code using Blitz! from Skyles Electric Works.

Blitz! worked just fine at speeding up most of his code, even most of the extensions from the Abacus library. However, it refused to compile any line that contained the READ@ command.

Mr. X offered me the princely sum of $20 to fix things so that he could Blitz! his program with the BASIC wedge. I agreed. The $20 would have been nice alone, but the challenge was something I couldn’t resist.

He transferred the files to me using the ubiquitous Punter protocol. I immediately went to work. I used a different BASIC compiler and received the same error that Blitz! had yielded for him.

C-64 BASIC compilers like Blitz! tried to allow for extensions to the BASIC language. If these compilers encountered BASIC tokens that they didn’t recognize as being syntactically correct, they often just passed the text directly into the regular interpreter. This technique worked for most situations. However, the BASIC language already had a token for a verb called READ.

The READ verb is used in BASIC to copy data from DATA statements into variables. Both of the compilers we were using had assumed that we were trying to use the normal READ verb, not realizing that the Abacus wedge was doing a little bit of looking ahead in the input stream to see if the token for the @ symbol followed the READ token.

As far as the compilers were concerned, the line of code with READ@ was in error as the @ symbol had no meaning when following READ in regular BASIC.

My solution?

Well, I didn’t have the Abacus book, but I had the ML code in a traditional READ/POKE loop in the BASIC code. I disassembled the ML code with an ML monitor ( Bill Yee’s Micromon was my favorite ) and began to see if I could determine where the code was checking for the READ token. I found the two tokens READ and @ next to each other in a little table.

I changed the positions of each token in the DATA statement and changed the code to observe the new format for the enhanced READ verb so that the command had to be issued in this manner:

@READ s$, file, rec

Note the @ symbol appearing first. Since the @ symbol was not a valid way to begin any BASIC statement, my BASIC compiler passed this text directly on to the BASIC interpreter which was wedged with the Abacus extension library. Everything ran correctly.

I e-mailed Mr. X and described how to swap the two ML numbers in his code and also informed him that he’d need to change the READ@ to @READ. I was fairly certain that this would also work with Blitz!

I got an e-mail back indicating that the changes allowed him to compile his code. He brought over the $20 while I wasn’t home, so I never got to meet him face-to-face.

As I was thinking about the $20 I thought about the items that I had to change. Really, the heart of the change was just swapping two bytes in the ML library. $10 per byte! ( Yeah, I know I had to change the BASIC code, too, but I didn’t count that. The ML code was really the significant piece that needed to be fixed. )

I sometimes look at compiled code I’ve written in my professional career and marvel at how financially-well off I’d be if I could be paid $10 per byte for each EXE file. ;-)

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2010 by James K. Lawless

→ Leave a CommentCategories: Commodore 64 · Tale Recursion
Tagged: , , , ,

An SMTP Server Simulator in Perl

January 8, 2010 · Leave a Comment

In the late 90’s, I wrote a commercial command-line SMTP e-mail utility called MailSend. In order to test the behavior of certain operations, I needed a program that would simulate an SMTP server. This simulator needed to allow me to quickly alter behaviors to generate conditions that customers might be seeing with their SMTP servers.

At the time, I wrote the tool in my utility language of choice: Perl.

smtpsim.pl

# An SMTP server simulator in Perl
#
# License: MIT / X11
# Copyright (c) 2009 by James K. Lawless
# jimbo@radiks.net http://www.radiks.net/~jimbo
# http://www.mailsend-online.com
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.

use Socket;

$port = 25;

$sockaddr = 'S n a4 x8';
$authcount=0;

($name, $aliases, $proto) = getprotobyname('tcp');
print "Port = $port\n";

$thisaddr = gethostbyname('localhost');

$this = pack($sockaddr, AF_INET, $port, $thisaddr);

socket(S, AF_INET, SOCK_STREAM, $proto);

bind(S,$this) || die "bind: $!";
listen(S,5) || die "connect: $!";

select(S); $| = 1; select(stdout);
for(;;) {
   print "Listening for connection....\n";

   ($addr = accept(NS,S)) || die $!;
    send(NS,"200 okey-dokey\r\n",0);
   ($af,$port,$inetaddr) = unpack($sockaddr,$addr);
   @inetaddr = unpack('C4',$inetaddr);
   print "$af $port @inetaddr\n";

   $ctr=0;
   while ($t=<NS>) {
      $ctr++;
      if( substr($t,0,4) eq "QUIT") {
         last;
      }
      print $t;
      print FIL $t;
      if( substr($t,0,1) eq '.') {
         send(NS,"200 okey-dokey\r\n",0);
         next;
      }
      $x=substr($t,0,4);
      if( ($x eq "EHLO") ||($x eq "HELO") || ($x eq "RCPT") || ($x eq "MAIL") ||
         ($x eq "RSET") || ($x eq "DATA") || ($x eq "QUIT")) {
            send(NS,"200 "  . $t .  " okey-dokey\r\n",0);
      }
   }
   close(NS);
}

Running the above simulator yields the following output:

Port = 25
Listening for connection....

I then use my MailSend utility to send an email using localhost as the SMTP server address:

C:\j\backup\MAILSEND\755>mailsend -smtp localhost -from jimbo@radiks.net -to jimbo@radiks.net -msg test

The smtpsim.pl console then displays the following conversation:

Port = 25
Listening for connection....
2 2279 127 0 0 1
HELO localhost
MAIL FROM: <jimbo@radiks.net>
RCPT TO: <jimbo@radiks.net>
DATA
To: jimbo@radiks.net
From: jimbo@radiks.net
Mime-Version: 1.0
X-Mail-Agent: MailSend v7.55

test
.
Listening for connection....

The console window for MailSend displays the following;


MailSend v7.55 (Registered version)
 attempting connection to localhost
Connection successful.
*** 200 HELO localhost okey-dokey
*** 200 MAIL FROM: <jimbo@radiks.net> okey-dokey
*** 200 RCPT TO: <jimbo@radiks.net> okey-dokey
*** 200 DATA okey-dokey
*** 200 okey-dokey

Send complete!

If I need to try and introduce specific behavior for a given SMTP command, I alter the if statements in the while loop to send back something other than a 200 status code.

The simulator can be used to test any SMTP client.

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2010 by James K. Lawless

→ Leave a CommentCategories: By Language · Internet Protocols · Perl · SMTP
Tagged: , ,

Along Came AWK

January 7, 2010 · 3 Comments

Early in my career as a software developer in the late 1980’s, I wrote code primarily in C for MS-DOS machines. I had been interested in Unix but had no access to a machine with that operating system. Nonetheless, I read a lot of Unix books and articles. I had stumbled across some information on a programming language called AWK that piqued my interest.

I soon found a tiny AWK implementation called BAWK ( Bob Brodt’s AWK ). It lacked associative-arrays and some other AWK niceties, but it introduced me to the simplicity that AWK affords for writing filter programs.

I later found an AWK interpreter that I simply remember as AWK210 because of the archive name. This version of AWK ported to MS-DOS had support for these new-to-me constructs called associative-arrays.

I bought the book The AWK Programming Language and was hooked.

The concept of associate-arrays led me to approach things differently in my AWK code. Different than I might have written something in C, that is. I envisioned associate-arrays as small in-memory, single-key databases. Of course, at the time, I didn’t use real databases in my code. I was still using b+tree-indexed data files, but I think you understand where I’m going.

I was able to write scripts that maintained in-memory associations that proved to shorten a lot of my code. I was familiar with the concept of a symbol-table as that concept was introduced early in the book Compilers: Principles, Techniques, and Tools. The book contained a sample C function that used a linear-feedback hash to map string keys to string values. AWK just made all of that seem very simple. I didn’t really know what algorithm it was using to map the keys to values and I didn’t care.

I wrote numerous short utilities in AWK at work. At home, I’d written a couple of utilities that leveraged the MS-DOS DEBUG.COM utility. The first was a simple assembler that provided the ability to use labels in scripts that would be piped through DEBUG to generate an executable .COM file. What better use for an associative-array than as a symbol-table for an assembler?

The second utility would generate an input file for DEBUG that would read in raw sectors from floppies using the L(oad) command and would write chunks of them out as binary data files. At the end of the process, all of these files would be collected into a single archive. I wrote a counterpart script to extract these binaries, read them in to DEBUG and write them out as sectors to the specified floppy. These two sets of scripts allowed me to make sector-for-sector floppy-disk images without much hassle.

The desktop calculator example in The AWK Programming Language was so simple, that it helped me to understand the mechanics of recursive-descent parsing. I was reading Jack Crenshaw’s Let’s Build a Compiler series in the pages of Computer Language magazine and was having trouble grasping this concept when trying to read it in the presented C code.

I later wrote a similar parser in C for a desktop calculator using those techniques. Later yet, I applied those concepts to the development of a report-generation language that unfortunately didn’t end up seeing the light of day.

I often used ( and still use ) AWK to create test data. In the old MS-DOS days, I would generate data in flat-files that my programs could read. More recently, I taught a couple of people at work how to extract data that they’d built in Excel and ( by way of an AWK script ) generate SQL code to load the data into a database table. This allowed others to tweak the data in a static form in the spreadsheet. We could then load the data by means of the AWK script.

I have also used AWK to generate code in C and Java based on input data.

I have used AWK to analyze source-code as well. I had written a series of scripts that ultimately parsed some C code written by a team that I was leading. The scripts looked for conformity to the coding standards that had been set in place. I would run the reports each night after everyone had commited their updates. In those days, that meant that they copied their most stable source files into a common directory tree. The scripts would produce a report indicating source files that appeared to be violating the standards citing the suspicious code. That script took a while to run, but I would kick it off in the evening and would forget about it until morning.

In the mid-90’s, I happened upon the Thompson Automation AWK compiler (TAWK) for MS-DOS. I reviewed it for an electronic magazine at the time. As part of my agreement with the publisher, I was able to keep the copy of TAWK for DOS, but I was not able to use it for commercial purposes.

The TAWK for DOS compiler was just great. It created stand-alone EXE’s. It had its own virtual-memory system that would intelligently use Extended Memory, Expanded Memory, and disk-files as necessary. This meant that I could write DOS programs that handled huge amounts of data in memory and would not be bound by the 640K ( really 1Meg ) memory barrier.

A little later, I had been in touch with Pat Thompson about trying out the version of TAWK I’d heard about for MS Windows 95. I received a review copy of the compiler. At the time, I had begun to write an AWK book for a publisher and wondered if I could write a book specifically on TAWK.

The publisher was a little stand-offish about writing for such a specific product and Thompson Automation was a little concerned about piracy. Their manual was their only copy-protection. I understood completely. The compiler was absolutely wonderful.

TAWK supported interfaces to Windows API calls and callbacks. It featured functions that allowed me to allocate and manipulate more C-ish binary data-structures. It also featured easy access to TCP/IP routines.

Instead of writing a book about TAWK, I wrote a review of the compiler for Dr. Dobbs Journal of Software Tools.

You can read the full copy of the review, Examining the TAWK Compiler – Dr. Dobbs Journal, May 1997, here:

http://www.ddj.com/dept/architect/184410193

I liked the compiler so much, that I wrote two of my early commercial Internet e-mail programs with it. I wrote MailSend first ( as mentioned here: http://jimlawless.wordpress.com/2009/11/04/my-big-shareware-splash/ ). Some of my customers were asking for a counterpart utility to read their POP3 mail, so I wrote MailGrab in TAWK as well.

Neither of those programs use AWK’s input capabilities … all processing is handled in the BEGIN pattern.

Many may find it odd that I wrote the utilities in an AWK dialect, but I was quite productive using TAWK. I didn’t have to worry about pointers ( for the most-part … I did have to pack and unpack some data-structures to provide some of the features. ) I didn’t have to worry about a lot of stringently-defined data-structures; I was able to pass associative-arrays around where I needed to keep data. Some of the run-time support routines issued error messages with the original source file line-numbers in them when I’d introduced a bug in trying to read a file. I was able to debug my software quickly.

The compiler and and source code fit neatly onto a single 1.44Meg floppy disk. In these days of multi-gigabyte USB flash-ROM drives, that may not seem like much, but I was rather happy to be able to carry the entire development environment for both products and their source around with me so that I could alter and compile them on any 32-bit Windows computer.

I have been selling those products now for over 10 years. Their stability and ability to run on continually evolving versions of Microsoft Windows is amazing.

Thompson Automation ceased doing business several years ago, so the TAWK compiler has not been updated nor is it sold. I am saddened each time I think about the company because TAWK is one of the most solid compiler I have ever used.

These days, I use the GNU AWK interpreter frequently. I use it at work to interrogate data and source files. On several occasions, we’ve been updating some code or needed to upgrade to a new JEE container or something and needed to ensure that we had removed all code that might cause issues. A few short AWK scripts always make that whole process quite painless.

When some friends were having trouble finding work, I wrote an AWK script that processes several files downloaded from a job-posting web-site into a single HTML file with the newest links at the top with all duplicate job-postings removed.

In the coming months, I hope to provide examples of each type of AWK script I’ve mentioned here as I think these kinds of tools can be very handy.

Posts I’ve written on this blog that contain AWK scripts are:

http://jimlawless.wordpress.com/2009/12/27/preserving-my-favorite-hn-links/

I use the above script to generate an HTML page from a text file so that my favorite links from the HackerNews site are updated and available to the public.

http://jimlawless.wordpress.com/2009/10/25/shrouding-csharp-and-java-source-code-with-awk/

The above is a source-code obfuscation utility for C# and Java using AWK. It requires a little coding discipline as a specific naming-convention has to be observed. It’s a prototype at best, but it’s the second such AWK program I’ve written to obfuscate the source for these two languages.

http://jimlawless.wordpress.com/2009/04/30/rss-feed-processing-with-awk/

When I began to write for this blog on wordpress.com, I wanted a way to turn the RSS syndication feed into an HTML file so that I could preserve a simple set of links to all of my posts. I use a variant of the script in the post above to translate the RSS XML into this HTML file: http://www.mailsend-online.com/bloglist.htm

I have another script I use for this site that generates the social media bookmark tags you’ll find at the end of each post. Right now, I’d prefer to keep that script to myself.

You will always be able to see all of the recent AWK-related material I write about by using the URL http://jimlawless.wordpress.com/category/by-language/awk-gawk/

I love AWK not only because I can write useful things quickly, but because using it has helped me simplify the way I approach various kinds of problems.

You might check back here every once in a while. I may finally write that AWK book I’ve been thinking about. ;-)

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2010 by James K. Lawless

→ 3 CommentsCategories: AWK / GAWK · By Language · Tale Recursion
Tagged: , ,

A Simple Parser for a Small Command Line Interface

January 2, 2010 · Leave a Comment

I like to create and tinker with little programming languages. Before I began to write more complex lexical analyzer functions, I used the strtok() function to retrieve tokens for a given miniature language.

Unfortunately, strtok() can be difficult to use once you introduce modest scanning rules.

I needed a function that would allow me to retrieve the next token from a line of text. This token was to be separated by spaces unless the token begins with a double quotation-mark. If the token begins with a dquote, I want to retrieve all characters including spaces up to ( but not including ) the next dquote.

If a ‘#’ symbol is encountered, I want the parser to return NULL. I will treat this symbol as a comment character.

Consider this code-snippet ( MIT / X11 from the full source applies ):

char *get_token(char *linestart,char *lineend) {
   char *p,*q;
   p=strtok(linestart," \t\r\n");
   if(p==NULL)
      return NULL;
   if(*p=='#')
      return NULL;
   if(*p=='\"') {
      q=p;
      p+=strlen(q);
         // reconstruct the string mangled by
         // strtok()
      while( ((*p)==0)&&(p<lineend))
         *p=' ';
      p=strtok(q+1,"\"");
      if(p!=NULL)
         p--;
   }
   return p;
}

The above is my function that obeys my simple rules. The function accepts a starting position and an ending position. The starting position may be NULL as it is simply passed on to strtok(). The ending position is used when reconstructing part of the string that strtok() has overwritten.

Initially, get_token() scans the input string delimited by whitespace. If the result is NULL, that value is returned.

If the result begins with our comment character ‘#’, a NULL is returned, signaling the end of processing for that line.

If the first character of the result is a dquote, the string-mangled by strtok() are replaced with spaces until either no more are found or until the original end of the line of text is reached. get_token() needs the end parameter to ensure that the original end-of-line position is not exceeded.

Once reconstructed, a call to strtok() is again made, beginning at the character after the first dquote, using a dquote as the only separator character. If that result is not NULL, the previous position is returned to the caller which causes the first dquote to appear, but not the last.

Please consider this test script file:
script.txt

println "What is your name?"
# Read data into variable a
inputa

# Display a greeting.
println "Hello there, " a " ... "

# Implement our own "PAUSE" feature
println "Press ENTER to continue."
inputa

# execute something from the shell
println "Here's a list of your current directory,"
sys "dir /w"

# drop out
exit
println "You won't get here."

The following program will display the tokens for each line of text in the file specified on the command-line:

parser.c

// A parser for a small command-line interpreter.
//
// License: MIT / X11
// Copyright (c) 2010 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

#include <stdio.h>
#include <string.h>

char *get_token(char *linestart,char *lineend);

int main(int argc,char **argv) {
   FILE *fp;
   char buff[1024];
   char *end;
   char *token;
   int i;
   fp=fopen(argv[1],"r");
   if(fp==NULL) {
      fprintf(stderr,"Cannot open input file %s\n",argv[1]);
      return 1;
   }
   while(fgets(buff,sizeof(buff)-1,fp)!=NULL) {
      buff[strlen(buff)-1]=0;
      end=buff+strlen(buff);

      token=get_token(buff,end);
      for(i=1;token!=NULL;i++) {
         printf("%-3d %s\n",i,token);
         token=get_token(NULL,end);
      }
      printf("\n");
   }
   fclose(fp);
}

char *get_token(char *linestart,char *lineend) {
   char *p,*q;
   p=strtok(linestart," \t\r\n");
   if(p==NULL)
      return NULL;
   if(*p=='#')
      return NULL;
   if(*p=='\"') {
      q=p;
      p+=strlen(q);
         // reconstruct the string mangled by
         // strtok()
      while( ((*p)==0)&&(p<lineend))
         *p=' ';
      p=strtok(q+1,"\"");
      if(p!=NULL)
         p--;
   }
   return p;
}

After compilation, you may invoke the above program against the file script.txt by issuing the following command-line:

parser script.txt

The output should appear as follows:

1   println
2   "What is your name?

1   inputa

1   println
2   "Hello there,
3   a
4   " ... 

1   println
2   "Press ENTER to continue.

1   inputa

1   println
2   "Here's a list of your current directory,

1   sys
2   "dir /w

1   exit

1   println
2   "You won't get here.

Here’s a small command-line interpreter that allows for one variable (a) and understands the following commands:

  • println – Display any number of literal strings or the variable a followed by a newline on the output console
  • inputa – Retrieve a string from the console standard input device and leave it in variable a.
  • sys – Execute a command from the shell using the first argument token only. All other tokens are ignored.
  • exit – Exit the script

cli.c

// A small command-line interpreter.
//
// License: MIT / X11
// Copyright (c) 2010 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

#include <stdio.h>
#include <string.h>

char *get_token(char *linestart,char *lineend);
char _var_a[256];

int main(int argc,char **argv) {
   FILE *fp;
   char buff[1024];
   char *end;
   char *token;
   int i;
   fp=fopen(argv[1],"r");
   if(fp==NULL) {
      fprintf(stderr,"Cannot open input file %s\n",argv[1]);
      return 1;
   }
   while(fgets(buff,sizeof(buff)-1,fp)!=NULL) {
      buff[strlen(buff)-1]=0;
      end=buff+strlen(buff);

      token=get_token(buff,end);
      if(token==NULL)
         continue;
      if(!stricmp(token,"inputa")) {
         if((fgets(_var_a,sizeof(_var_a)-1,stdin))!=NULL) {
            _var_a[strlen(_var_a)-1]=0;
         }
         else {
            *_var_a=0;
         }
      }
      else
      if(!stricmp(token,"println")) {
         for(;;) {
            token=get_token(NULL,end);
            if(token==NULL)
               break;
            if(*token=='\"')
               printf("%s",token+1);
            else
            if(!stricmp(token,"a"))
               printf("%s",_var_a);
            else
               printf("%s",token);
         }
         printf("\n");
      }
      else
      if(!stricmp(token,"exit")) {
         break;
      }
      else
      if(!stricmp(token,"sys")) {
         token=get_token(NULL,end);
         if(token!=NULL)
            system(token);
      }
      else {
         fprintf(stderr,"Unknown command %s\n",token);
         break;
      }
   }
   fclose(fp);
}

char *get_token(char *linestart,char *lineend) {
   char *p,*q;
   p=strtok(linestart," \t\r\n");
   if(p==NULL)
      return NULL;
   if(*p=='#')
      return NULL;
   if(*p=='\"') {
      q=p;
      p+=strlen(q);
         // reconstruct the string mangled by
         // strtok()
      while( ((*p)==0)&&(p<lineend))
         *p=' ';
      p=strtok(q+1,"\"");
      if(p!=NULL)
         p--;
   }
   return p;
}

To execute this program after compilation, enter the following:

cli script.txt

You should be prompted for your name. The script should then greet you. It should then wait for you to hit ENTER. Finally, it should display your current directory ( if you’re running in Windows. You might want to change the dir command to ls if you’re running in Linux. )

Note that since the script encounters an exit verb before the last line, the last println is never executed.

Blank lines and lines beginning with ‘#’ are automatically ignored.

The source code, sample script file, and Windows EXE files from this post can be found here:

http://www.mailsend-online.com/wp/cli.zip

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2010 by James K. Lawless

→ Leave a CommentCategories: By Language · C · Compilers and Interpreters
Tagged: , , ,

Preserving my Favorite HN Links

December 27, 2009 · 3 Comments

One of my favorite web sites is the HackerNews / YCombinator site: http://news.ycombinator.com

The site name may be a bit misleading as the term hacking relates more closely being innovative than the more negative connotations that have been associated with the word.

This particular site acts as a collector for links which may be of interest to the indepedent / start-up software author and/or business-owner. I like the site because of the mix of really good technology links and links about business strategies.

The most worthwhile part of the site for me is the area for comments about each submitted link. I find the conversations there very intelligent and worth reading. Often, I skip the original link and read the comments first.

The question came about today about the rise in posted links which deviate a bit from the site’s original themes. ( See: http://news.ycombinator.com/item?id=1016946 )

As the community grows, I’m sure that some links are very important to part of the HN members but not all.

I suggested that we might think about listing our own favorite links. As like-minded members of the HN community locate each other, it’s more likely that posts of interest won’t be missed as they scroll off to the bottom of the site’s list.

For a couple of years, I’ve kept a simple text file containing the original link with the HN link underneath, followed by some brief, descriptive commentary if necessary.

A sample of the file looks like this:

I wrote an AWK script that will format the above text file into a static HTML file.

hnfaves.awk

# Build static HTML page for HN favorites.
#
# License: MIT / X11
# Copyright (c) 2009 by James K. Lawless
# jimbo@radiks.net http://www.radiks.net/~jimbo
# http://www.mailsend-online.com
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.

BEGIN {
   printf("<html><head><title>HN Faves</title></head></body>\n");
   printf("<font face=\"Comic Sans MS\">\n");
   printf("<h2>Favorite HN links for jim_lawless</h2>\n");
}
NF==0 { printf("<p />") }
NF>0 {
   for(i=1;i<=NF;i++) {
      if(substr($i,1,4)=="http") {
         printf("<a href=\"%s\">%s</a><br />\n",$i,$i);
      }
      else {
         printf("%s ",$i);
      }
   }
}
END {
   printf("</font></body></html>\n");
}

I then constructed a script that will apply the AWK script against my text file of favorites producing an HTML file called hnfaves.htm

The script then FTP’s the favorites page to my web site here:
http://www.mailsend-online.com/hnfaves.htm

I’ll now link to this in my HN profile so that anyone interested can look at the bookmarks that I specifically list for the HN site.

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2009 by James K. Lawless

→ 3 CommentsCategories: AWK / GAWK · By Language
Tagged: , , ,

FIF Isn’t Forth

December 12, 2009 · Leave a Comment

Several years ago, I created a Forth-like programming language interpreter called FIF ( FIF Isn’t Forth ). I put FIF aside for a while as other events began to consume my time. I ultimately ended up losing all of the source code that I had built to that point.

I had pondered trying to recreate it, but the work I had done in making FIF opened my eyes to things that I’d want to do differently. I had made FIF too lazily inefficient ( all types are strings, continuous interpretation of text, …etc.)

Although I don’t have the source code, I still have the documentation that I had created. I thought that I would combine the various HTML docs that I had written up into a single post here.

FIF History

I suppose the earliest influence on FIF would have been during the mid-80’s when I downloaded a free implementation of the Forth programming language for my 8-bit microcomputer. I went out and bought the book “Starting Forth” by Leo Brodie, having decided to learn a little about Forth.

The next several days were an absolute joy. The freeware Forth I’d downloaded was much more stable ( and much more fun ) than any of the BASIC / C / Pascal compilers that I’d paid for.

While I never became a rabid Forth programmer, I’d always maintained a fascination with the simplicity, flexibility, and expressive power available in such a small language implementation. Note: To categorize Forth as merely another programming language is a bit of an injustice. Forth can also be considered an entire operating system / operating environment, and interactive development environment.

Over the next twenty or so years, I used a number of programming languages in a professional capacity. Most of these languages stemmed from the C/C++ family.

While using these languages, I still read the occasional article on Forth.

By the mid-90’s, Forth coverage was scarce in the glossy trade magazines, but the Forth community remained alive and well on various Internet portals. The Usenet group comp.lang.forth served as a central message exchange area for those interested in Forth systems.

The comp.lang.forth community’s passion for Forth overwhelmed me. Although I often had little to add to the conversations, I lurked and read, and learned a lot from people who had a long professional history with Forth.

In the late 90’s, one of the programs I support professionally was experiencing difficulties interacting with a new piece of peripheral hardware. The application was written in C++. By this time, the C++ development toolset I used was too large to install on the customer’s PC so that I could debug the problem, so I did the next best thing; I implemented a short postfix language embedded directly into the application. It was very crude … simply an interface to the specific hardware control functions of the program … but it allowed me to call the functions interactively and allowed me to examine their results.

After about 15 minutes of tinkering with the user’s system via my postfix language, I was able to identify the incompatibility with the new hardware *and* was able to determine how to make it work within the application.

The speed at which I was able to diagnose and fix my application’s problem amazed me. I had only added about 50 lines of C++ code to my application to implement this little postfix language. Granted, it did not have any requisite safety checks or other niceties, but I found it plausible to implement a low-overhead programming language that could be embedded in an application in a modicum of code.

During this period, I was selling ( and continue to sell ) a series of niche applications. I had intended to add some form of scripting system to some of these applications. After the incident above, my attention turned to possible development of a postfix language as an embedded control language.

In the fall of 1999, I posted this message to comp.lang.forth:


Jim Lawless Oct 10 1999, 3:00 am
Newsgroups: comp.lang.forth
From: (Jim Lawless) - Find messages by this author
Date: 1999/10/10
Subject: Forth as a scripting / extension language

Greetings, all.

I'd be interested in hearing from anyone who has implemented
a Forth-like extension language to a given product ( such as
a word-processor or terminal program...)

How well was it accepted by the end-user? ( Did they whine and
ask for a BASIC-like language? )

Was the user a technician?

What were the pro's and con's of modeling the extension
language after Forth?

Thanks!

Jim Lawless

After a response from a prominent Forther Stepen Pelc, I posted this follow-up message:


Jim Lawless Oct 11 1999, 3:00 am
Newsgroups: comp.lang.forth
From: (Jim Lawless)
Date: 1999/10/11
Subject: Re: Forth as a scripting / extension language

In article , s...@mpeltd.demon.co.uk
says...

:: We've done this many a time, ranging from a set up language for
:: an uninterruptible power supply (UPS), to configuration tools for
:: Windows applications.

Thank you for your input, Stephen. Perhaps I should
clarify my motives a bit.

I currently sell a few low-cost utility programs and
entertainment diversions. I'd like to devise ONE scripting
engine to use in all of my products.

Let's say for sake of example that I am producing yet another
Windows screen-saver authoring tool. ( Loaded question coming
up here...) How well do you think the average user of
entertainment software would gravitate to a Forth-like extension
language?

I have heard of several variations of C, BASIC, and LISP used
successfully as extension languages for end-users, but I don't
believe I've really ever heard of a system that specifically
provided a Forth-like extension language.

I understand that by its very nature, Forth provides the
facilities for self-scripting, but I'm curious as to the
acceptance of the language by the end-user.

Jim Lawless

The follow-up messages in the thread and other experiments led me to believe that a postfix language was likely the correct pursuit … but I exhaustively researched other embeddable scripting languages such as LUA, TCL, and Guile.

Shortly after posting this set of messages, my attention became a bit diverted … my time to apply to the postfix language project became scarce. So, the postfix language design stagnated.

You’ll note instances in Usenet history where I seemed to be dipping my toes in the postfix pond … reference to a small C-based stack-language was posted to comp.compilers … I built a scripting language known as Backlash as a Java applet and then rebuilt it as a pure client-side JavaScript system so that it was embeddable in your favorite browser.

Along the way, I found empirical evidence indicating that a number of other kindred spirits had constructed their own Forth-like extension languages:

  • Autodesk ATLAST ( Autodesk Threaded Language Application System Toolkit )
  • UNTIL ( UNconventional Threaded Interpretive Language ) from the book Write Your Own Programming Language Using C++
  • FICL ( Forth-Inspired Command Language )
  • pForth ( Phil Burk’s portable embeddable Forth in C )
  • PISTOL ( Portably Implemented Stack-Oriented Language ) Dr. Dobbs Journal, Feb ‘83
  • STOIC ( STring-Oriented Interactive Compiler )

I hope that FIF fulfills the following goals:

  • FIF should be approachable by non-programmers
  • FIF should allow users of my software to stretch said software beyond my original intentions
  • FIF should allow users of my software to control and manipulate critical aspects of the software
  • FIF should facilitate rapid-development
  • FIF should provide facilities to aid in debugging and long term maintenance of FIF code
  • FIF should be something that I can easily support as a software author
  • FIF should lend itself to a variety of applications

I hope you find FIF to be an interesting control system.

How FIF Differs from Forth

  • Overriding a FIF word changes *any* invocation of that word.
  • FIF does not emphasize knowledge of the machine as a processor would see it. Instead, FIF emphasizes the use of all string data.
  • FIF does not allow the creation of compile-time words.

FIF Semantics

FIF syntax is intentionally oversimplified.

A FIF program consists of a series of words.

A word is any series of characters that excludes the space-character.

Any number of spaces or empty lines can separate FIF words.

When FIF encounters a word beginning with a digit, it treats the words as a base-10 number and pushes the string representation of that number onto the stack.

When FIF encounters the word s”, it takes the remainder of characters ( after the separating space ) up to the next double-quotation-mark character ( ” ) and pushes that value as a string onto the stack.

When FIF encounters a system word or user-defined word, it executes the word.

A user may override the behavior of any FIF word, by providing their own implementation of the specific word. ( Flow-control and defining words may not be overriden. )

The following words require one or more words to appear to their right when executing ( these words may not be overridden ):

  • :
  • Var
  • Synonym
  • Include”
  • S”
  • See

FIF Internals

FIF code is interpreted on-the-fly until a defining-word is encountered. When the defining-word : is encountered, FIF begins compiling the remainder of words up to and including the next ; symbol into an internal program space for later execution.

A “dictionary” containing the word name, the word type ( system word, user-defined word, or variable ) is leveraged.

An array of variable descriptors is used to provide access to user-defined variables. Each decriptor contains a pointer and current maximum size of a variable. If the size of an existing variable is less than or equal to the size of an object which is to be assigned, the object is simply overlaid in the existing memory area. Otherwise, the existing memory area is freed and a new memory area is allocated to contain the new string object.

FIF employs garbage-collection on temporary strings by using a simple ring-buffer as a scratchpad. New strings are allocated at the tail end of the buffer. When we reach the end of the buffer, the pointer to the buffer is moved back to the beginning, consuming older temporary strings.

Permanent strings, such as those used in variable assignments, are allocated dynamically via the C function malloc().

The FIF Interface to C

void fif_register_word(char *w,char mytyp,long myexe,int ovrd);
Register a new FIF word. Intended for registering system words only by the external C program.

char *fif_pop();
Pop a string from the FIF data stack.

void fif_push(char *);
Push a string onto the FIF data stack.

void fif_eval(char *);
Evaluate (execute) a FIF snippet. Definitions may be included. If definitions are included in the EVAL’ed code, they will be compiled into an intermediate form and retained for later execution.

char *fif_newstring(char *);
Create a new permanent string.

void fif_err(char *,char *);
Issue an error message. The first string is intended to be an error message prefix. The second string is intended to be the object of the error.

void fif_init_words();
Initialize the FIF system.

void fif_interact();
Drop into an interactive console with FIF.

The source code for a simple FIF REPL shell is as follows:

#include <windows.h>
#include "fif.h"

extern int __argc;
extern char **__argv;

/*
int __stdcall WinMain (HINSTANCE hInstance, HINSTANCE hPrevInstance,
                    char * szCmdLine, int iCmdShow) {
*/

void main() {
   fif_init_words();
   fif_eval("s\" FIF v 4.0a\" . cr s\" Copyright 2005 by Jim Lawless\" . cr ");
   fif_eval("s\" (Enter BYE to exit FIF.\" .");
   fif_eval("s\" or enter FIF_SH (filename) to run a script.)\" . cr cr");
   if(__argc>1) {
      fif_load(__argv[1]);
   }
   fif_interact();
}

Note: The above can be compiled as a console or GUI app.

FIF Debugging

FIF debugging can currently be handled by invoking the TRACE word.

Once TRACE is invoked, you’ll see the names of all words as they execute.

In a future version of FIF, one will be able to define a special word that will be called by the tracing engine to allow one to homebrew their own tracer.

NOTRACE turns tracing off.

The FIF Core Word Set

Mathematics

*
Multiply top two items on stack. Result is left on stack.

+
Add top two items on stack. Result is left on stack.

-
Subtract top two items on stack. Result is left on stack.

/
Divide top two items on stack. Quotient is left on stack.

mod
Divide top two items on stack. Remainder is left on stack.

random
Consume top of stack item as upper limit and gen random non-negative integer under this limit.

Logic and Control-Flow

<
Is less than

=
Is equal to

>
Is greater than

>=
Is greater or equal to

<=
Is less than or equal to

!=
Is not equal to

if
Consume top item from stack. If item is zero, take ELSE or ENDIF branch. Otherwise, follow sequential flow until counterpart ELSE or ENDIF is encountered.

else
See use above with IF

fendif
See use above with IF

begin
Define beginning area for loop construct with UNTIL

until
Consume top item from stack. If zero, repeat at word after counterpart BEGIN. Otherwise, continue processing words sequentially.

File I/O

Do not use File I/O!!! These words have not been debugged.

fgetc

fgets

fopen

fputs

fputc

fclose

Console I/O

input
Input a line from the console and leave resulting string on top of stack.

.
Consume top of stack and display on console.

emit
Consume top of stack as a number. Display single character representation of number on console.

cr
Emit a newline

Stack Manipulation

dup
Duplicate the top item on stack non-destructively

rot
Move third item on stack to the top of the stack.

swap
Swap top two stack items.

drop
Remove top item from stack.

String Manipulation

chrtonum

tolower

toupper

substr

strcmp

strlen

stricmp

numtochr

concat

FIF Common Programming Constructs
(
Begin comment. End with ) character.

:
Begin definition. End with exit ; character.

;
End : definition and/or exit.

include”
Include filename on the right of the word INCLUDE” after whitespace. This word appends a file to the active work buffer, then EVAL’s it. I do not yet utilize a mechanism to prevent redundant includes.

synonym
Does not consume a stack item.
synonym old-word new-word
…causes the definition of a new word that points to the definition of an existing word. Useful when overriding an existing word. Can be used to invoke original functionality as needed when overriding a word.

see
Interactive.
see word-name
Show the code comprising a word’s definition if it is a user-defined word.

trace
Turn debug-tracing on.

notrace
Turn debug-tracing off

var
Define a variable.
var varname
…creates a new dictionary entry for the variable and assigns an index into the list of all variables.

set
Consumes top two stack items … one of which is a variable index and sets the variable value.

get
Consumes top stack item as variable index and leaves the current value of variable on the top of stack.

eval

Miscellaneous Words

bye
Exit FIF

words
List words to console.

sleep
Consume top stack item a a number of milliseconds and sleep for that interval.

msgbox
Display messagebox. More info later.

millitime
Get time in milliseconds and leave on top of stack.

system
Consumes top of stack as a string and executes it under the current command shell.

Sample FIF Code Snippets

Hello, world!

s" Hello, world!" . cr
Hello, world! in a message box
0 s" Title" s" Hello, world!" msgbox

Add and display 5 and 6

5 6 + .

Display 10 Random Integers, under 100 to the console.

( Test random number generation )
( Let's loop 10 times and gen random numbers from 1 to 100 )
: testrnd
   1 ( put counter on top of stack )
   begin
      dup . s" " .
      100 random . cr
      1 +   ( inc counter )
      dup
   10 > until
   drop ;
testrnd

Interact with user and see if the entered number is odd or even. Then, list numbers 1 through 10 and display whether they are odd or even.

( Ask the user to enter a number and tell them
  if it's odd or even. Then, list the first 10 positive
  integers and determine if they're odd or even. )

      ( define word well? that displays either "odd" or "even"
      depending on whether the number at the top-of-the stack
      is odd or even )

   : well? dup ( duplicate top stack item for later display )
      2 mod    ( divide by 2 leaving the remainder 1=odd 0=even )
      swap     ( swap original number and truth-value from mod )
      .        ( display original number )
      s"  is " . ( display literal )
      if       ( compare against mod value )
        s" odd." .
      else
        s" even." .
      endif
      cr
    ; (  end of word )

   : main
      ( display the message )
   s" Enter a non-negative number." . cr
      ( input the number )
   input
   well? ( test the number and display result )
      ( Now see how 1-10 work )
   cr s" Testing numbers 1 through 10..." . cr
   1  ( push 1 )
   begin
      dup
      well?
      1 + dup
   10 > until
   drop ;
main

Display the lower-case alphabet

  : main
  s" a" chrtonum ( get ASCII value of 'a' onto TOS )
  begin
     dup  ( duplicate current value )
     emit ( write the character to the console )
     1 +  ( increment to next letter )
     dup  ( dup it for comparison )
     s" z" chrtonum ( compare greater than "z" )
     >
  until
  drop ;
main

Display the upper-case alphabet by tricking-out the emit word.

  ( Define new "emit" that only displays in upper case )
  : emit numtochr toupper . ;
  : main
  s" a" chrtonum ( get ASCII value of 'a' onto TOS )
  begin
     dup  ( duplicate current value )
     emit ( write the character to the console )
     1 +  ( increment to next letter )
     dup  ( dup it for comparison )
     s" z" chrtonum ( compare greater than "z" )
     >
  until
  drop ;
main

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks

Unless otherwise noted, all code and text entries are Copyright © 2009 by James K. Lawless.

→ Leave a CommentCategories: C · Compilers and Interpreters · Forth · Nonconformity
Tagged: , , ,

Invoking the Default Windows Screen-Saver

December 9, 2009 · Leave a Comment

Several weeks ago, I posted code for a utility that prevents a Windows screen-saver from firing by simulating mouse motion ( http://jimlawless.wordpress.com/2009/10/19/preventing-windows-screen-saver-activation/ ).

Today, I’m providing a short WSH JavaScript script that will invoke the default screen-saver. I noted a search for similar functionality that led to my blog. This post will be included in the “By Popular Demand” category of items requested via a search-engine.

runsaver.js

// Launch the default screen-saver
//
// License: MIT / X11
// Copyright (c) 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

   var shl=new ActiveXObject("WScript.Shell");
   var ss=
      shl.RegRead(
         "HKEY_USERS\\.DEFAULT\\Control Panel\\Desktop\\SCRNSAVE.EXE");
      // Let's wait one second to make sure all fingers are off the keyboard and no further
      // mouse motion is happening...
   WScript.Sleep(1000);
      // Run the screen-saver
   shl.Exec(ss + " /s");

To invoke this script, you may enter one of the two following command-lines:

cscript runsaver.js

wscript runsaver.js

CScript will run the console version of the WSH interpreter engine; WScript will run the GUI version and will not display a console window.

You may create a shortcut to this script by using wscript.exe as the program to run and runsaver.js as the sole parameter. You might need to specify a full path for the runsaver.js script in the shortcut.

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks

Unless otherwise noted, all code and text entries are Copyright © 2009 by James K. Lawless.

→ Leave a CommentCategories: By Popular Demand · Javascript
Tagged: , , ,

Locking a Windows Session

December 1, 2009 · Leave a Comment

Several years ago, a friend and I discussed the topic of a program that would, after a period of inactivity, issue the equivalent function of locking one’s Windows session via CTRL-ALT-DEL followed by a click on the appropriate “lock” button. The screen-saver was supposed to fulfill this role, but the thought-process was that someone could disable their screen-saver.

I did a little investigation into the matter and could not find a way to lock the session via a program. Years later, a function was added to user32.dll that could be invoked to provide this functionality. The C program below attempts to find this function in User32 dynamically and then invokes it. If not found, an error message will be displayed.

lock.c

// Lock a Windows session.
//
// License: MIT / X11
// Copyright (c) 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

#include <windows.h>

#pragma comment(lib,"user32.lib")

int main(int argc,char **argv) {
   FARPROC lockWorkStation;
   HANDLE user32;
   user32=LoadLibrary("user32.dll");
   lockWorkStation=GetProcAddress(user32,"LockWorkStation");
   if(lockWorkStation!=NULL)
      (*lockWorkStation)();
   else
      MessageBox(NULL,
         "You don't have LockWorkStation in User32",
         "",MB_OK);

}

The source and sample executable file for lock can be downloaded in a single archive at:
http://www.mailsend-online.com/wp/lock.zip

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2009 by James K. Lawless

→ Leave a CommentCategories: C
Tagged: ,