Direct Threaded Daydreams

A Simple Associative Array Library in C

November 18, 2009 · Leave a Comment

I often need to use associative arrays when writing C code. You might be more familiar with these constructs under other names ( dictionaries, maps, hashmaps, hashes, …etc. ) I will use the term map in place of the words associative array.

A map is a collection of key / value pairs or name / value pairs. My implementation of a map structure allows strings as the only data that can be stored in the name and value.

Many implementations of maps use a hashed lookup to speed up execution. I have opted to use a simple forward-chained linked-list of map structures. When searching for an item in a map, the search will sequentially pass over every item in the list of map entries until a match is found. If no match is found, an empty string is returned.

This implementation is complete enough for my needs.

map_lib.h

// map_lib
// A simple associative-array library for C
//
// License: MIT / X11
// Copyright (c) 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

#ifndef MAP_LIB_H
#define MAP_LIB_H

struct map_t {
   struct map_t *nxt;
   char *name;
   char *value;
} ;

struct map_t *map_create();
void map_set(struct map_t *m,char *name,char *value);
char *map_get(struct map_t *m,char *name);

#endif

map_lib.c

// map_lib
// A simple associative-array library for C
//
// License: MIT / X11
// Copyright (c) 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

#include <stdio.h>
#include <malloc.h>
#include <string.h>
#include "map_lib.h"

struct map_t *map_create() {
   struct map_t *m;
   m=(struct map_t *)malloc(sizeof(struct map_t));
   m->name=NULL;
   m->value=NULL;
   m->nxt=NULL;
}

void map_set(struct map_t *m,char *name,char *value) {
   struct map_t *map;

   if(m->name==NULL) {
      m->name=(char *)malloc(strlen(name)+1);
      strcpy(m->name,name);
      m->value=(char *)malloc(strlen(value)+1);
      strcpy(m->value,value);
      m->nxt=NULL;
      return;
   }
   for(map=m;;map=map->nxt) {
      if(!stricmp(name,map->name)) {
         if(map->value!=NULL) {
            free(map->value);
            map->value=(char *)malloc(strlen(value)+1);
            strcpy(map->value,value);
            return;
         }
      }
      if(map->nxt==NULL) {
         map->nxt=(struct map_t *)malloc(sizeof(struct map_t));
         map=map->nxt;
         map->name=(char *)malloc(strlen(name)+1);
         strcpy(map->name,name);
         map->value=(char *)malloc(strlen(value)+1);
         strcpy(map->value,value);
         map->nxt=NULL;
         return;
      }
   }
}

char *map_get(struct map_t *m,char *name) {
   struct map_t *map;
   for(map=m;map!=NULL;map=map->nxt) {
      if(!stricmp(name,map->name)) {
         return map->value;
      }
   }
   return "";
}

Here’s a simple test program that loads and displays a map with name/value pairs. The names are all numbers. The values are words that sound like the numbers ( homonyms ).

map_stuff.c

#include <stdio.h>
#include "map_lib.h"

void display_both(struct map_t *m,char *key);

int main(int argc,char **argv) {
   struct map_t *test;

   test=map_create();
   map_set(test,"One","Won");
   map_set(test,"Two","Too");
   map_set(test,"Four","Fore");

      // display them out of order
   display_both(test,"Two");
   display_both(test,"Four");
   display_both(test,"One");

   printf("\n");

      // reset an existing entry
   map_set(test,"Two","To");

   display_both(test,"Two");
   display_both(test,"Four");
   display_both(test,"One");

   printf("\n");

   display_both(test,"Eight");

   map_set(test,"Eight","Ate");

   printf("\n");

   display_both(test,"Eight");
}

void display_both(struct map_t *m,char *first) {
   printf("%s %s\n",first,map_get(m,first));
}

The output from the above program is as follows:

Two Too
Four Fore
One Won

Two To
Four Fore
One Won

Eight 

Eight Ate

I did not provide any kind of iterator functions in the library. If I need to iterate through a given map, I will just use a for-loop and will use the ->nxt element to reach each successive entry.

I will use the above library in later posts.

The source and sample executable file for map_lib can be downloaded in a single archive at:
http://www.mailsend-online.com/wp/map_lib.zip

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2009 by James K. Lawless

→ Leave a CommentCategories: C
Tagged: , ,

Extracting URL Addresses from Text in C

November 16, 2009 · Leave a Comment

A question arose in a forum that I recently read asking how one might most efficiently ( in terms of both processing time and memory use ) extract URL’s from text.

I wrote the following program as an example of the approach I’d probably take.

extract_url.c

// extract_url
// Extract http and https URL's from text.
//
// License: MIT / X11
// Copyright (c) 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

// states
#define S_h     (1)
#define S_t1    (2)
#define S_t2    (3)
#define S_p     (4)
#define S_s     (5)
#define S_col  (6)

   // Lower-case alpha only.  tolower() will be used
   // when searching for legal characters.
char *legal_chars =
   "abcdefghijklmnopqrstuvwxyz0123456789"
   "./\\~#%&()_-+=;?";

int _state;

void print_urls(char *);

int main(int argc,char **argv) {
   char buff[1024];
   while(fgets(buff,1023,stdin)!=NULL) {
      print_urls(buff);
   }
}

void print_urls(char *s) {
   char *p,*mark;
   _state=0;
   for(p=s;*p;p++) {
      switch(_state) {
         case 0:
            if(*p=='h') {
               _state=S_h;
               mark=p;
            }
            break;
         case S_h:
            if(*p=='t')
               _state=S_t1;
            else
               _state=0;
            break;
         case S_t1:
            if(*p=='t')
               _state=S_t2;
            else
               _state=0;
            break;
         case S_t2:
            if(*p=='p')
               _state=S_p;
            else
               _state=0;
            break;
         case S_p:
            if(*p==':')
               _state=S_col;
            else
            if(*p=='s')
               _state=S_s;
            else
               _state=0;
            break;
         case S_s:
            if(*p==':')
               _state=S_col;
            else
               _state=0;
            break;
         case S_col:
            if(strchr(legal_chars,tolower(*p))==NULL) {
               while(mark<p) {
                  fputc(*mark,stdout);
                  mark++;
               }
               fputc('\n',stdout);
               _state=0;
               p--; // backtrack
            }
      }
   }
   if(_state) {
      while(mark<p) {
         fputc(*mark,stdout);
         mark++;
      }
   }
}

When building hand-coded lexers, I generally create a map of states for each token and keep incrementing the state to the next legal token-character state until the input character is no longer valid. At that point, my lexer will do something with the characters that had been collected to that point.

In the case of an HTTP(S) URL processor, I defined seven possible states:

    Zero – Starting state. Treat the input character as the first character and set the variable mark to refer to this position in the input line.
    One – We have an ‘h’ from http or https.
    Two – We have the first ‘t’ from http or https.
    Three – We have the second ‘t’ from http or https.
    Four – We have the ‘p’ from http or https.
    Five – We have the ’s’ from from https.
    Six – We have the colon-character from http or https.

After the input state has reached six, the code looks in the string legal_chars to determine if it should keep tracking valid URL characters. ( I may have missed a few. You might need to add or remove some from this string in order for the code to work properly.)

Once an invalid character or EOF is reached, the code outputs the URL starting from the variable mark up to ( but not including ) the current input character. The state is then reset to zero and the current character is returned to the input loop for first-time processing.

To execute extract_url on a text file, you should issue a command-line similar to the following:

extract_url < tmp.txt

The source and EXE files for extract_url can be found here:
http://www.mailsend-online.com/wp/extract_url.zip

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2009 by James K. Lawless

→ Leave a CommentCategories: C
Tagged:

An Embedded Mini-Interpreter

November 15, 2009 · Leave a Comment

Several years ago, when discussing software protection schemes others had spoken of dynamically modifying code itself on the fly. In years past, this technique may not have been cause for concern, but the fact that the processor cache may pre-load blocks of code left me curious as to how one would flush the cache and reload the altered code. I later found some Windows API functions that helped in this endeavor, but I pondered a slightly different route to accomplish lower-level code obfuscation.

My solution was certainly influenced by the September 1989 Dr. Dobbs Journal article Roll Your Own Minilanguages With Mini-Interpreters by Michael Abrash and Dan Illowsky. You can read this article here: http://www.ddj.com/article/printableArticle.jhtml?articleID=184408206.

The article presents an interpreter written in assembly language that uses the assembler’s ability to define pointers to implement a simple interpreter. I took a similar approach and defined a mini-interpreter in C that consisted of ten very specific instructions. My interpreter did not use function-pointers; I chose to use a switch/case construct so that the code could more easily be ported to other languages.

My goal was to be able to first create some sort of sample program that could be broken down into a handful of core, top-level functions. These core functions and a few supplemental functions would comprise the instruction set for the mini-interpreter. By encoding the instructions and their parameters into an array of integers, I could then save the array to the filesystem. Using this technique, I could use a C program that could create any external file for the mini-interpreter that I wanted. Once externalized, these files could be encrypted and / or obfuscated. My hope was that standard techniques to trace 80×86 object code would be of little value during the execution of these pseudo-instructions.

Consider the code for file game1.c

// License: MIT / X11
// Copyright (c) 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>

int howLarge;
int theNumber;
int tries;

void clearCounter();
void genRandomNumber();
void askHowLarge();
void playTheGame();
void howDidTheUserDo();
void wannaPlayAgain();

main() {
   printf("Registered version.\n");
   askHowLarge();
   for(;;) {
      genRandomNumber();
      playTheGame();
      howDidTheUserDo();
      wannaPlayAgain();
   }
}

void clearCounter()
{
   tries=0;
}

void genRandomNumber()
{
   time_t t;

   srand((unsigned int) time(&t));
   theNumber=( rand() % howLarge ) + 1;
}

void askHowLarge()
{
   char buff[12];
   printf("What's the largest number I should think of?");
   fgets(buff,11,stdin);
   howLarge=atoi(buff);
}

void playTheGame()
{
   char buff[12];
   int num;
   for(;;) {
      tries++;
      printf("Take a guess: what number am I thinking of? ");
      fgets(buff,11,stdin);
      num=atoi(buff);
      if( num == theNumber ) {
         printf("Correct!\n");
         return;
      }
      if( num < theNumber )
         printf("Too low.\n");
      else
         printf("Too high.\n");
   }
}

void howDidTheUserDo()
{
   printf("It took you %d ",tries );
   if( tries == 1 )
      printf("try.\n");
   else
      printf("tries.\n");
}

void wannaPlayAgain()
{
   char buff[5];
   printf("Would you like to play again?");
   fgets(buff,4,stdin);
   if( (*buff != 'y')&&(*buff!='Y')) {
      exit(0);
   }
}

game1.c is intended to model a fully functional registered copy of a game where the computer chooses a random number and allows the user to continue entering guesses until the number is reached. With each incorrect answer, the program will respond with “Too high.” or “Too low.”

The main() function issues calls to a series of other functions. While each of these functions will intentionally be used as an instruction in the mini-interpreter, the program will need a few extra support functions ( such as a function that can display a string ). We’ll also need a primitive flow-control instruction that the game can repeat forever. Finally, we’ll add an instruction that will terminate execution.

Embedding the Interpreter

In the source for gameinterpreter.c, the top-level functions from game1.c have been kept and the support functions have been added. These functions have been separated into a library that will be used in the remaining programs.

gameinterpreter.c

// License: MIT / X11
// Copyright (c) 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include "gameinterpreter.h"

static int howLarge;
static int theNumber;
static int tries;
static int insp;   // virtual instruction pointer
static int labels[10];
extern int pcode[300];

void clearCounter();
void genRandomNumber();
void askHowLarge();
void playTheGame();
void howDidTheUserDo();
void wannaPlayAgain();
void branch();
void printString();
void setLabel();
void end();

static void clearCounter()
{
   tries=0;
}

static void genRandomNumber()
{
   time_t t;

   srand((unsigned int) time(&t));
   theNumber=( rand() % howLarge ) + 1;
}

static void askHowLarge()
{
   char buff[12];
   printf("What's the largest number I should think of?");
   fgets(buff,11,stdin);
   howLarge=atoi(buff);
}

void playTheGame()
{
   char buff[12];
   int num;
   for(;;) {
      tries++;
      printf("Take a guess: what number am I thinking of? ");
      fgets(buff,11,stdin);
      num=atoi(buff);
      if( num == theNumber ) {
         printf("Correct!\n");
         return;
      }
      if( num < theNumber )
         printf("Too low.\n");
      else
         printf("Too high.\n");
   }
}

static void howDidTheUserDo()
{
   printf("It took you %d ",tries );
   if( tries == 1 )
      printf("try.\n");
   else
      printf("tries.\n");
}

static void wannaPlayAgain()
{
   char buff[5];
   printf("Would you like to play again?");
   fgets(buff,4,stdin);
   if( (*buff != 'y')&&(*buff!='Y')) {
      exit(0);
   }
}

static void branch()
{
   insp=labels[ pcode[insp+1] ];
}

static void printString()
{
   int c;
   do {
      insp++;
      c=pcode[insp];
      if(c)
         printf("%c",c);
   } while(c);
}

static void setLabel()
{
   labels[ pcode[insp+1] ] = insp+2 ;
}

static void end()
{
   exit(0);
}

void interpret(int position)
{
   insp=position;
   for(;;) {
      switch( pcode[insp] ) {
         case CLEAR_COUNTER:
            clearCounter();
            insp++;
            break;
         case GEN_RANDOM_NUMBER:
            genRandomNumber();
            insp++;
            break;
         case ASK_HOW_LARGE:
            askHowLarge();
            insp++;
            break;
         case PLAY_THE_GAME:
            playTheGame();
            insp++;
            break;
         case HOW_DID_THE_USER_DO:
            howDidTheUserDo();
            insp++;
            break;
         case WANNA_PLAY_AGAIN:
            wannaPlayAgain();
            insp++;
            break;
         case BRANCH:
            branch();
            break;
         case PRINT_STRING:
            printString();
            insp++;
            break;
         case SET_LABEL:
            setLabel();
            insp=insp+2;
            break;
         case END:
            end();
            break;
      }
   }
}

Most of the functions have been made static with the exception of the interpret() function. The counterpart header file appears below:

gameinterpreter.h

#ifndef GAME_INTERPRETER_H
#define GAME_INTERPRETER_H

enum {
   CLEAR_COUNTER=0,       // 0
   GEN_RANDOM_NUMBER,     // 1
   ASK_HOW_LARGE,         // 2
   PLAY_THE_GAME,         // 3
   HOW_DID_THE_USER_DO,   // 4
   WANNA_PLAY_AGAIN,      // 5
   BRANCH,                // 6
   PRINT_STRING,          // 7
   SET_LABEL ,            // 8
   END                    // 9
} ;

void interpret(int);

#endif
//

The enum construct defines defines 10 symbols, each having a value in the range of 0 through 9. Those numbers will be used as virtual opcodes.

The additional support functions are as follows:

  • printString() – This function displays a string of characters that occurs immediately after the PRINT_STRING opcode until we hit a 0. Each character in the string will occupy one int. That’s certainly not the best way to implement this mechanism, but it will suffice.
  • setLabel() – An integer after the opcode is read and is used to mark the offset of the next instruction for the target of a BRANCH instruction. SET_LABEL,0 would place the position of the next instruction into label 0 in the array labels.
  • branch() – This function reads the next int in the stream, looks up that entry in the label table, and branches to it my manipulating the virtual instruction-pointer insp.
  • end() – Terminate exection.
  • interpret(int) – This function uses a switch-case to interpret our virtual instruction set.

The program game2.c will use the interpreter to implement the registered copy of the game.

#include "gameinterpreter.h"

int pcode[300]={
   PRINT_STRING,'R','e','g','i','s','t','e','r','e','d',' ',
      'v','e','r','s','i','o','n','.','\n', 0,
   ASK_HOW_LARGE,
   SET_LABEL,0,
   GEN_RANDOM_NUMBER,
   PLAY_THE_GAME,
   HOW_DID_THE_USER_DO,
   WANNA_PLAY_AGAIN,
   BRANCH,0
} ;

main() {
   interpret(0);
}

The actual p-code program is contained in an integer array named “pcode” which contains 300 entries. ( 300 is just an arbitrary number that I used for sake of example. )

   int pcode[300]={
      PRINT_STRING,'R','e','g','i','s','t','e','r','e','d',' ',
         'v','e','r','s','i','o','n','.','\n', 0,
      ASK_HOW_LARGE,
      SET_LABEL,0,
      GEN_RANDOM_NUMBER,
      PLAY_THE_GAME,
      HOW_DID_THE_USER_DO,
      WANNA_PLAY_AGAIN,
      BRANCH,0
   } ;

When the interpet function begins execution, the PRINT_STRING opcode will be found, causing the printString() function to execute. printString() will continuously display the characters remaining in the pcode array until it reaches an int with the value zero.

ASK_HOW_LARGE causes the like-named function to execute.

SET_LABEL,0 marks the next instruction (GEN_RANDOM_NUMBER) as label number zero. Later in the code when the program encounters BRANCH,0, control will transfer to the GEN_RANDOM_NUMBER entry.

The next several opcodes ( GEN_RANDOM_NUMBER, PLAY_THE_GAME, HOW_DID_THE_USER_DO, WANNA_PLAY_AGAIN ) cause their counterpart functions to be executed.

The BRANCH,0 opcode ( assuming that wannaPlayAgain() hasn’t terminated execution ) will branch back to the GEN_RANDOM_NUMBER section, simulating the never-ending for-loop from GAME1.C.

Creating the Trial Version

game3.c implements the trial version by simply changing the pcode array:

#include "gameinterpreter.h"

int pcode[300]={
   PRINT_STRING,'U','n','r','e','g','i','s','t','e','r','e','d',' ',
      't','r','i','a','l',' ','v','e','r','s','i','o','n','.','\n', 0,
   ASK_HOW_LARGE,
   GEN_RANDOM_NUMBER,
   PLAY_THE_GAME,
   HOW_DID_THE_USER_DO,
   END
} ;

main() {
   interpret(0);
}

The trial version displays “Unregistered trial version.” and allows one iteration of gameplay. The END opcode terminates the program after the user plays one round.

Externalizing the P-Code

game4.c and game5.c write files REG.DAT and TRIAL.DAT respectively. These files are copies of the pcode[] array from the registered and unregistered versions of the game.

game4.c

#include <stdio.h>
#include "gameinterpreter.h"

int pcode[300]={
   PRINT_STRING,'R','e','g','i','s','t','e','r','e','d',' ',
      'v','e','r','s','i','o','n','.','\n', 0,
   ASK_HOW_LARGE,
   SET_LABEL,0,
   GEN_RANDOM_NUMBER,
   PLAY_THE_GAME,
   HOW_DID_THE_USER_DO,
   WANNA_PLAY_AGAIN,
   BRANCH,0
} ;

main() {
   FILE *fp;
   fp=fopen("REG.DAT","wb");
   fwrite(pcode,1,sizeof(pcode),fp);
   fclose(fp);
}

game5.c

#include <stdio.h>
#include "gameinterpreter.h"

int pcode[300]={
   PRINT_STRING,'U','n','r','e','g','i','s','t','e','r','e','d',' ',
      't','r','i','a','l',' ','v','e','r','s','i','o','n','.','\n', 0,
   ASK_HOW_LARGE,
   GEN_RANDOM_NUMBER,
   PLAY_THE_GAME,
   HOW_DID_THE_USER_DO,
   END
} ;

main() {
   FILE *fp;
   fp=fopen("TRIAL.DAT","wb");
   fwrite(pcode,1,sizeof(pcode),fp);
   fclose(fp);
}

game6.c is intended to be the final game runtime in the evolution of these game programs. It accepts one file name command-line parameter when executed. Game6 can be used to execute either the file REG.DAT or the file TRIAL.DAT.

To play the registered version, type the following:

game6 reg.dat

To play the trial version, type the following:

game6 trial.dat

This demonstrates that game6 contains the game engine, while the game-flow is controlled by an oversimplified script that we enter in an array of integers.

Since the array is now external ( and is data ), it can be encrypted. We can also add a CRC or a message-digest to the pcode data to detect tampering.

This source code and post were intended to be used as a model. There are certainly many improvements that can be made to the embedded interpreter. The intent was to show how to externalize control-flow without modifying native code on the fly.

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2009 by James K. Lawless

→ Leave a CommentCategories: C · Compilers and Interpreters

The Protection Racket

November 13, 2009 · Leave a Comment

If you ask the question in a given forum whether or not copy-protection schemes should be used in software, you’ll get all kinds of answers. I won’t try to tell you what to do, but I will describe my experiences with copy-protection as a software author.

Some of the products I will discuss are available here:

http://www.mailsend-online.com

Others have been retired.

Early Attempts

My first product, Keymac, used no copy-protection. I received very little in the way of compensation, but I really didn’t do well at marketing the product. I had always wondered how many people were actually using it and were not paying for it.

Some time later, I developed MailSend, a command-line SMTP e-mailer. I had decided to leave the early versions of MailSend fully functional just to see what kind of acceptance I received. Early sales were okay. Not great, but I would get a few registrations each month. I decided that it was time to experiment with copy-protection.

I wondered what kind of protection I should use. A lot of my peers were using key files that would enable their products’ features. I opted to go a different route. I didn’t want registered users to have to deal with key files, so I decided to send registered users a fully-functional EXE file. The trial version of MailSend employed a copy-protect feature that allowed the user to send no more than about fifteen e-mails per Windows session. They had to reboot their machines to send fifteen more. I also have a nag message that was attached to each outgoing email. The recipients of the emails receive a message that the mail was sent with a trial copy of MailSend.

Unfortunately, I also added the attachments feature to MailSend at this time. My sales rose dramatically. I was never certain if the sales were due to the protection I had added or if the new feature was what sold them on the product.

I did converse via e-mail with one young woman who very openly told me that she had tried every way she could ( she was not an expert at reverse-engineering or anything like that ) to get MailSend to send more than fifteen items per Windows invocation. She finally paid the ten-dollar fee that I was asking for the software at the time.

Other customers had asked me if the nag message and fifteen item limit would go away if they paid. They often seemed oblivious to the fact that I was asking them to pay until they ran into these snags.

In a later version, I changed the trial version message embedded in each e-mail item so that it was encrypted in the EXE. I had finally made the big time; Someone had felt it necessary to provide a crack for the trial version of MailSend so that it would no longer spew forth this message.

In my mind, this approach worked nicely. I applied the same item-limit approach to MailGrab, a command-line POP3 e-mail reader. I don’t believe I’ve ever had any complaints about the limit and am quite happy with the number of registrations I receive for that product.

I used to search Usenet via DejaNews to see if I could drum up sales and happened upon someone who had asked for a crack for one of my versions of MailSend. I responded:


From: Jim Lawless
Date: 2000/03/21
Subject: Re: MAILSEND 6.50

{quote}Does anyone have a crack for MAILSEND 6.50 to get rid of the limitation
{quote}of only being able to send 15 messages????

Hi, Frank.

I'm the author of the MailSend tool you're asking about. While
I'm happy that you find it useful, I'm a little disappointed
that you're looking for a crack for it.

I've tried to keep the pricing reasonable ($10 ). All upgrades
are always free; the people who registered the earliest 1.0x
versions are still receiving free upgrades as I release them.

I'm hoping that if you do find a crack ( or if someone builds
a new one ) that eventually you'll join the ranks of many
happy MailSend users.

Best wishes.

Jim Lawless

I suppose that I should have asked for a reply. I never did receive one. It just amazed me that some people would go to such lengths to avoid paying ten dollars. I know that the world is a big place and depending where Frank was from, ten dollars may as well have been $10,000. If this person was in the U.S. at the time and was able to work, they seemed to be spending more time hunting for a crack than they would have needed to work at a minimum-wage job to pay for a legitimate license.

I pondered this as the topic came up again in the shareware forums that I frequented.

A New Experiement

I created a few supplmental products that I no longer sell:

  • Run and Hide – A program that launches a console app in a hidden window.
  • TSched – Tiny command-line scheduler. I open-sourced this program on the blog without the copy-protection routines.
  • HangUp – Command-line Dial-Up-Networking HangUp utility. I also placed this program into open-source here on the blog without the protection routines.

These were all small C programs that really didn’t take very long to write. I decided to write one protection function that I would include in them all. The new protection function simply wrote the current date to an INI file in the Windows directory in a scrambled form and used that date as a baseline to determine when the product should stop working. ( Usually thirty days from the first use. I later learned from my peers that 30 individual uses would have been a better choice. )

If one simply deleted the INI file, each of the above products would start their timer over again, allowing unlimited use of the product.

This seems like an approach that’s too simple, but it really isn’t. What I had come to conclude is that while some people will pay for your software, some won’t. Those who figured out my scheme and willfully bypassed it were probably lost causes. Others were likely just procrastinators. In my mind, the protection-schemes I employed in my products were there to remind anyone evaluating my product that it’s time to register.

The above protection wasn’t fool-proof. I didn’t like the idea that my apps would need permissions to write to the filesystem. Some users run with low privileges on Windows and may not actually be able to write to INI files. I did not want my products to fail because I needed to include protection. I had hoped that anyone having difficulties would contact me, but no one ever did. I still do not know how many users stopped using the trial versions of the above products because of side-effects of the protection.

Overprotection

Most of my products were geared toward a technical use of some kind. While a number of people had registered MailSend and MailGrab for personal use, a much larger number of my clients used these tools in a business setting. I wanted to see if I could create a product to sell to the general consumer.

I wrote a little application called WallShow that would periodically change the wallpaper to a random JPEG, BMP, or PNG image from a specified directory. I thought that this was a fairly interesting product that would be popular. WallShow probably brought in the fewest number of registrations of all my products.

I wrote my own installer, which one reviewer referred to as “weird” or “unusual” or something like that. I used the protection scheme from above … the INI file with a thirty-day trial period. I also placed a “trial version” message on each image that appeared as wallpaper. I then added an image between the random ones that stated that WallShow was unregistered.

The same reviewer gave WallShow really bad marks because of this. The product itself never really took off, but I don’t think that was a factor of the overprotection I had employed. I like to think that maybe I could have received a few more registrations before I retired the product completely.

Just before doing so, I saw a crack for WallShow that removed all of my protections. I tried the crack myself and ended up recompiling WallShow so that the crack would no longer work. Then, I placed it on my site for a while. Apparently, someone thought it was good enough to use without all of the protection.

The Philosophy

I’ve tried to stick with a philosophy of using copy-protection as a reminder.

The copy-protection that I employ with ScreenKap, my command-line screen capture program, is a simple date built in to the EXE file. The trial version expires on a pre-set calendar date so that no temporary INI files need to be used.

It’s sometimes been a challenge to remember to update that value and re-release the trial version, but it’s the easiest to implement.

I went a step further and encrypted all of the string literals in the program by building a custom preprocessor. The preprocessor would transform each string literal into a series of encrypted hex-bytes in a global data buffer and would replace the literal with a function-call to the decrypt routine in the literal’s place. I wasn’t necessarily trying to remind the procrastinators, a new threat of people ripping off others’ software and reselling it under their own name had arisen. I doubt that ScreenKap had ever fallen into that trap, but it was again an experiment that I wanted to try.

I sell a program called CMD2EXE, which packages batch script files and other supporting files into a single EXE file. The copy-protection scheme I employ with CMD2EXE is that the output EXE will only run on the same day it was generated. This should allow people to test the software thoroughly for as long as they like. They cannot deploy an EXE that has been created with the trial version or it will cease to function in twenty-four hours. This also has been a worthwhile mechanism.

Wrapping it Up

I should state that my philosophy of sending the registered users a fully-function copy has allowed some pirates to place fully-function versions of my software on their sites by purchasing a single copy. I’ve talked with some peers about a way around this, but it seems like anything that would be effective might be intrusive for a prospect who would legitimately license my software.

My programs will likely all have some form of reminder and/or mild deterrent in them. I’m not certain what new techniques I might employ, but I do revisit this subject from time to time.

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2009 by James K. Lawless

→ Leave a CommentCategories: Tale Recursion
Tagged: ,

Understanding TRS-80 CMD Files

November 8, 2009 · 4 Comments

I’ve recently begun to use TRS-80 Model I emulators to recapture some of the programming experiences of my younger days. The emulator I’m currently using under Windows is trs80gp which can be found here:

http://members.shaw.ca/gp2000/trs80gp.html

I invoke the emulator with the command-line parameters -m1 to force Model I emulation and -na for non-authentic display.

While trs80gp does not support any of the Disk-Operating-Systems of yesteryear, it does provide a menu option that will load /CMD executable files and BASIC files. You really can’t save any information back to the Windows filesystem, but that’s okay. For my purposes, I want to write some programs in a cross-assembler or cross-compiler environment and will just load the files to try them out. At some point, I will either look for or will build a utility that will tokenize a text listing to a BASIC listing so that I can create BASIC programs as well.

My initial goal, however, is to write Z-80 code and run it on the emulator.

My first task was to find a cross-assembler for the Z-80 that would run under Windows. The trs80gp site references ZMac which sounds like a great editor, but it doesn’t appear to compile in its current form under Windows.

I found a utility called Pasmo here:

http://www.arrakis.es/~ninsesabe/pasmo/

Like some of the others I tried, Pasmo basically generates a binary machine-image file. I chose Pasmo because of the -d option which shows an assembly listing on the standard output device.

I would like to be able to package up anything I write into a standard /CMD file so that it can be used with other emulators or on the real hardware itself. In order to do that, I was going to have to determine how to convert a machine-image file into a /CMD file.

As a test program, I would use the example routine I published in my post http://jimlawless.wordpress.com/2009/11/07/learning-z-80-assembly-language-on-the-trs-80/. That routine fills the video memory with the all-white character (191) and then returns.

I have several /CMD images of games that I have on cassette and began looking through them. I could see some control-codes and such, but my initial stab at trying to interpret them was not successful. After a little searching on the web, I found a reference to an article from The LDOS Journal volume 1, issue 4. Tim Mann has copies of this issue and others on his site here:

http://www.tim-mann.org/misosys.html

The article that describes the format is in the column Roy’s Technical Corner.

Roy describes the record formats permissible in a /CMD file. The format is not unlike the binary tag-based system used in the TIFF graphical image file format. The first byte one encounters is a record-type byte. The next byte is a length of bytes that will follow … sort of. The remainder of bytes should match the length specified in the length byte. The next record in sequence should be another record ID / length / payload sequence, but that doesn’t seem to hold true either.

I wrote a short C program readcmd.exe that lists each record in a /CMD file. While number of my /CMD files parsed correctly just fine, some did not.

Roy explains that the 01 record indicates a loadable block of binary data. The length byte in many of my /CMD files was zero, which I correctly assumed would yield a 256-byte block. However some of the /CMD files in my possession use a value of two in the length byte and seem to have a payload bigger than two bytes in length.

The article further explains that each loadable block of data first contains a load-address and states that zero and one are special values that indicate a two-byte load-address will be followed by 254 and 255 bytes of data respectively. The article doesn’t mention the value two, but I assume that since the 01 record will always have a load-address, two bytes will always follow. The values zero, one, and two are then used for machine-images of size 254, 255, and 256 respectively. The value three is a complete mystery to me. I have seen a small block with a length of four and the payload that follows the load-address is four bytes in length. I’ll tinker later and see how the emulators load a record with a length value of three.

I should state that my readcmd program is dependent on the Intel representation of a 16-bit integer ( Least Significant Byte followed by Most Significant Byte ). An unsigned short integer must be 16-bits in width in order for the program below to run correctly.

readcmd.c

// readcmd
// Dump the record information for a TRS-80 /CMD
// executable file
//
// License: MIT / X11
// Copyright (c) 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

#include <stdio.h>

int main(int argc,char **argv) {
   FILE *fp;
   unsigned char buff[258];
   unsigned int len;
   unsigned short address;
	printf("readcmd v1.0 by Jim Lawless\n");
	printf("http://jimlawless.wordpress.com\n\n");
   fp=fopen(argv[1],"rb");
   for(;;) {
      if(!fread(buff,1,1,fp))
         break;
			// record type is "load block"
      if(*buff==1) {
         fread(buff,1,1,fp);

         len=*buff;
				// compensate for special values 0,1, and 2.
         if(len<3)
            len+=256;
				// read 16-bit load-address
         fread(&address,1,2,fp);
         printf("Reading 01 block, addr %x, length = %u.\n",address,len-2);
         fread(buff,1,len-2,fp);
      }
      else
				// record type is "entry address"
      if(*buff==2) {
         fread(buff,1,1,fp);
         len=*buff;
         printf("Reading 02 block length = %u.\n",len);
         fread(&address,1,len,fp);
         printf("Entry point is %d %x\n",address,address);
         break;
      }
      else
			// record type is "load module header"
      if(*buff==5) {
         fread(buff,1,1,fp);
         len=*buff;
         printf("Reading 05 block length = %u.\n",len);
         fread(buff,1,len,fp);
      }
      else {
         printf("Unknown code %u at %lx\n",*buff,ftell(fp)-1L);
         break;
      }
   }
   fclose(fp);
}

I found that after the 02 record is encountered, all kinds of garbage data can follow. I assume that most /CMD loaders halt interpretation of the file after the 02 record is encountered. You’ll notice a break out of the main input loop when readcmd encounters this record.

My readcmd program was able to parse through all of the /CMD files in my possession. Now that I have a way to verify the correctness of a /CMD file, it’s time to try and build my own.

I really can’t remember where the first bytes of free memory really start on a Model I. Address 17129 looks to be the spot where BASIC begins, but I’ve never owned a DOS on a Model I, so I don’t know if that address can change.

I noticed that a lot of the games I have begin at address 6C00H, so I chose that as the starting-address for my program.

Here is the slightly modified source code from my prior post:

fill.asm

    ORG     6c00H
    LD      HL,3C00H ; 15360 in hex
    LD      A, 191
    LD      [HL],A
    LD      DE,3C01H
    LD      BC,1023
    LDIR
    RET

I then assembled it with the command:

pasmo -d fill.asm fill.out

The output from Pasmo is as follows:

		ORG 6C00
6C00:21003C	LD HL, 3C00
6C03:3EBF	LD A, BF
6C05:77		LD (HL), A
6C06:11013C	LD DE, 3C01
6C09:01FF03	LD BC, 03FF
6C0C:EDB0	LDIR
6C0E:C9		RET
Emiting raw binary from 6C00 to 6C0E

My load-module is fifteen bytes in size. I need to create a load-record that accommodates fifteen plus two bytes for the load address. My 01 record will have a length of seventeen ( or 11H ) bytes. Here’s how the 01 record should look in hex:

01 11 00 6C 21 00 3C 3E BF 77 11 01 3C 01 FF 03
ED B0 C9

The total space occupied by the 01 record is nineteen bytes.

I then needed to add a 02 record to state the transfer address of 6C00H:

02 02 00 6C

The total size of the two records is twenty-three (17H) bytes in length.

I manually created the /CMD file using the Windows console DEBUG utility.

debug fill.cmd
e 100  01 11 00 6C 21 00 3C 3E BF 77 11 01 3C 01 FF 03
e 110  ED B0 C9 02 02 00 6C
rcx
17
w
q

I first used readcmd to ascertain that the records looked reasonable:

readcmd fill.cmd

readcmd v1.0 by Jim Lawless
http://jimlawless.wordpress.com

Reading 01 block, addr 6c00, length = 15.
Reading 02 block length = 2.
Entry point is 27648 6c00

I loaded fill.cmd in trs80gp and it correctly filled the screen with whitespace.

I had expected the RET instruction to drop me back into BASIC, but it did not. I’m not sure if that’s an idiosyncrasy of trs80gp or if all emulators and the actual hardware/software will behave in this manner. ( NOTE: after fixing my code to use 1023 bytes to move instead of 4095 as George Phillips, author of trs80gp notes in the comments to the post, I did indeed drop into BASIC, but received a ?SN ERROR message at the prompt. I may try George’s suggestion of JP-ing to 1A19H at a later time. )

In the near future, I will write a program that will convert a larger memory-image file ( such as the one produced by Pasmo ) into a /CMD file.

The source and EXE files for readcmd can be found here, along with the source and /CMD file for the fill program.

http://www.mailsend-online.com/wp/readcmd.zip

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2009 by James K. Lawless

→ 4 CommentsCategories: ASM · TRS-80
Tagged: ,

Learning Z-80 Assembly Language on the TRS-80

November 7, 2009 · 1 Comment

My first computer was a second-hand TRS-80 Model I with 16K of RAM and a cassette-recorder for auxiliary storage. I was 17 years old when I received this computer in the Fall of 1982. My goal? Like many kids my age at the time, I had intended to write a video game or two and live happily on the riches that would befall me.

The computer was purchased at an auction and came with a fair amount of books on BASIC including many that just contained BASIC games.

My eldest brother had his own TRS-80, so he let me borrow from his vast library of 80 Micro magazines as well.

My first task was to really try to learn BASIC. I did so by typing in programs from the books and by trying to write my own. I had been familiar with BASIC for a couple of years, but only some of the more simple aspects. I had known that if I wanted to write games, I was going to have to code using some mysterious black art known as machine-language. All of the good games from Big Five seemed to be written in machine-language.

As I kept poring through the backlog of issues of 80 Micro, I learned how to use little machine-language subroutines via the USR() function. In order to execute machine-language from BASIC, you had to first reserve some space at the top of BASIC at the MEMORY SIZE? when the computer powered up. ( Note: there are ways of getting around this that I’ll explain in a future post. ) After that, you had to POKE your machine-language routine into reserved memory a byte at a time. Authors usually placed these bytes in a series of DATA statements and used the READ command to read each one in a loop.

After the subroutine was placed in memory, the last task was to point the USR() function to the routine. The address of the routine had to be broken down into two bytes, the least-significant-byte first. I began to grow comfortable with the conversion routine … dividing by 256 to get the high-byte and taking the remainder to get the low-byte.

I also became very comfortable with hexadecimal notation and conversion to and from decimal notation. The only thing I was really lacking at this point, was learning machine-language itself.

I began reading Hardin Brothers’ 80 Micro column The Next Step. This column was a tutorial on machine-language in a sort of cookbook approach. Brothers would present some sort of a short program in assembly-language ( the human-readable syntax that is then assembled into machine-language. )

Most of his programs were very compact and were wonderful to study. He would present the assembly listing for the machine-language routine complete with hex codes by each mnemonic. I found that by converting those hex codes to decimal, I would see the same numbers in the DATA statements for the BASIC loader for the particular routine.

This revelation enabled me to tinker a little with some of the subroutines presented.

One of the machine-language subroutines that was presented in the Radio Shack Level II BASIC manual was a simple routine that would fill the screen with white space. This was done by storing a character with the ASCII code 191 at each location in the machine’s video memory ( located at locations 15360 to 16383 inclusively. )

To see what the program was doing, the BASIC equivalent was easy enough to understand:

As the program ran, each character position on the screen would fill with a white block:

The equivlent assembly language program usually looked something like this:

    ORG     0H
    LD      HL,3C00H ; 15360 in hex
    LD      A, 191
    LD      [HL],A
    LD      DE,3C01H
    LD      BC,1023
    LDIR
    RET

The first line is the ORiGin directive that tells the assembler ( the program that translates assembly-language to machine-language ) what address we plan on starting at. This little routine is relocatable; it can be placed anywhere in memory because it does not internally depend on addresses within itself.

The major work is performed by a Z-80 instruction called LDIR ( LoaD-Increment-Repeat ). LDIR is a block-memory move command that begins by taking the byte at the address held in the HL register pair and stores it at the address in the DE register pair. Then, it decrements the BC register pair by 1 and if BC is not zero, it will increment HL and DE repeat the load-from-HL / store-at-DE / dec BC operation until BC reaches zero.

This routine uses a trick with overlapping memory locations. Note that HL starts at 3C00H and DE 3C01H. When the second iteration of the LDIR loop commences, the value it reads from 3C01H to store in 3C02H was the same as the one in 3C00H. This trick with the source and destination addresses differing by a byte causes the memory at DE for a length of BC to fill with the first byte specified.

So, this is a quick memory-fill routine.

The assembler’s output of this routine might look like the following:

                ORG 0000
0000:21003C     LD HL, 3C00
0003:3EBF       LD A, BF
0005:77         LD (HL), A
0006:11013C     LD DE, 3C01
0009:01FF03     LD BC, 03FF
000C:EDB0       LDIR
000E:C9         RET

The hex digits to the left of each instruction comprise the instruction and its operands (if any). Note the line LD A, BF. This is the line that loads a character 191 into the A register. I found that I could poke any character I wanted into byte number five of this routine and could fill the screen with that value.

I was slowly making headway to learning assembly language itself as I was beginning to understand that the output of the assembler program was ultimately a binary with a bunch of bytes that the Z-80 processor understood.

As I continued to study examples in 80 Micro, I showed some of these programs to my brother. He brought over a couple of items that he’d gotten at a clearance sale at Radio Shack: The Radio Shack Editor/Assembler on cassette and William Barden Jr’s book “TRS-80 Assembly Language Programming.”

I tried to leap into the middle of the book as I had done when learning BASIC, but failed miserably. I started anew and took it step by step. I finally started getting somewhere.

It took a while, but I was able to separate the pseudo-operation command from the actual Z-80 commands and began to make my own subroutines. I also began to use tricks with the cassette load-module format that I had seen some video games use. I figured out how to auto-start a program ( no need to type in a slash at the SYSTEM prompt), scroll the contents of the screen and blur the video in a manner similar to the effects that began the game Attack Force. I also figured out how to load a message immediately on the screen and that the asterisk that normally flashed during a casette load could be replaced with a character 191 for a more graphical effect.

By the Summer of 1983, my goal was to finally write this game. Alas, I knew nothing about software design. I would design fragments, but I was used to just sloppily coding something together and cajoling it to work. I really didn’t have a game idea. I was just working on different effects and animations and was trying to stitch them together into a game.

Unfortunately, my cassette recorder ( like many ) was unreliable when saving / loading my games. The EDTASM program required one to assemble to cassette, then reload to test. This process took forever. I had found a way to put my program at a high-enough spot in memory that I luckily found a re-entry point for EDTASM, so that once my code was tested, I didn’t have to reload EDTASM … I just entered SYSTEM and then an address ( that now escapes me ) that would leave me back at EDTASM’s ‘*’ prompt with my source-code intact.

I spent many late summer nights working on that code while my TV was affixed on a then-independent cable channel from Kansas City. I coded while listening to the audio for The Three Stooges followed by a mix of Get Smart, Hogan’s Heroes, and similar syndicated television shows from the 1970’s.

As I toiled away, the TRS-80 video games market crashed. New computers were becoming popular as was the Atari 2600 video game console system. If I had finished a game and had tried to market it through Big Five Software ( see http://www.bigfivesoftware.com), chances are they wouldn’t have taken it. Nor would any big-league publisher likely have taken on any new games for the TRS-80.

However, I didn’t finish a game. The slow development cycle and the inability to save reliably ultimately left me no choice but to move on. I had looked at getting an expansion interface so that I could then buy a disk drive and a Disk Operating System to minimize my turnaround time, but those options were costly. More attractive computers with color video and relatively inexpensive disk drive options started to take over the landscape.

Using my knowledge of Z-80 assembly-language, I taught myself 6502 assembly-language on one of the Apple ][’s at school. In the Summer of 1984, I bought a Commodore 64 and 1541 disk drive. My days of hacking the TRS-80 were over.

…almost.

I amassed quite a bit of knowledge in a very short amount of time about that little computer. I had wanted to publish some of the more esoteric things I’d found in a magazine like 80 Micro, but I never did. I have some TRS-80 emulators and am planning on revisiting some of these subjects on this blog in the near future.

Oh, by the way … if you wish to email me nowadays, you can reach me at:

jim@trs-80.com

How cool is that? ;-)

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2009 by James K. Lawless

→ 1 CommentCategories: ASM · TRS-80 · Tale Recursion
Tagged: , , ,

My Big Shareware Splash

November 4, 2009 · Leave a Comment

If you peek through the archives of alt.comp.shareware.programmer, you’ll see posts from me in the mid-90’s asking questions from the guys who were actually making money by selling their own software.

In April of 1997, I took the big plunge and unleashed a real product onto the masses by way of the SimTel archive:


Jim Lawless
Apr 23 1997, 1:00 am
Newsgroups: comp.archives.ms-windows.announce
Followup-To: comp.archives.ms-windows.discuss
From: (Jim Lawless)
Date: 1997/04/23
Subject: mailsend.zip – Command-line Internet mailer for Win95/NT

I have uploaded to Simtel.Net:

{URL’s removed}mailsend.zip 109880 bytes

mailsend.zip Command-line Internet mailer for Win95/NT

The Mailsend program is a utility for automating Internet e-mail transfers. Mailsend is run from the Win95 command-line. It allows the user to specify a text file and a recipient. The text file will then be sent to the recipient via Internet mail.

By using mailsend within a batch file, a user can send a single text file to a list of users. This feature makes it a practical tool for automating periodic e-mail transmissions such as electronic magazines or announcements via e-mail.

Shareware. Uploaded by the author.


Humble Beginnings

I was quite energetic about writing and selling my own software in the mid-90’s. Perhaps not as energetic as people who made lots of money, but I put a fair amount of time into the study of people who made money selling products on their own.

In 1997, I happened to be writing a review of the Thompson Automation AWK compiler for an article I had pitched to Dr. Dobbs Journal ( see Examining the TAWK Compiler ). I wrote a small client for the Internet finger protocol to demonstrate the compiler’s versatility. As I was going over the code, I had wondered what it would take to flesh out the finger client a little more to make an SMTP e-mail client.

My first attempt was fruitful. I had a command-line emailer working that would simply send text messages via SMTP mail without a lot of frilly features. I did not initially support attachments or other niceties. I decided to use my new creation, MailSend, as a test to learn more about independent software sales. ( Please note that over the years, a number of command-line emailer products have taken the name MailSend … not just mine. )

I found out how to format the supplementary readme files and metadata files whose origins could be traced back to the download sections of electronic Bulletin-Board Systems. I wrote a license agreement and a liberal support policy. I offered free upgrades for life. That was a huge mistake on my part.

My thinking was that I was going to be constantly evolving the product, so if bugs or defects arose, I would simply ask the client to obtain the free upgrade to see if they could recreate the problem with the newest version. I was concerned about being able to support multiple simultaneous versions.

Another concern I had was that I had written the code in a pretty obscure compiler: TAWK. I had reasoned that the entire command-line compiler and the source code to MailSend fit neatly on one 1.44M floppy disk … so I could carry it around and could recompile it on just about any machine.

After discussing things with the folks on alt.comp.shareware.programmer, I found that many of them took non-traditional approaches to writing their software as well. Some wrote in PowerBASIC, many wrote in Delphi, some in varieties of C/C++. I began to realize that the lone wolf developer needed to leverage whatever they were most comfortable with, as long as the tool itself did not become obsolete.

I priced the software at $10 ( another big mistake ) and made a few announcements here and there on various newgroups and forums.


The Early Sales

I was very happy when the first checks made their way to my mailbox. I was seeing a nice little stream of money for this product, but it wasn’t anywhere near what the professionals on a.c.s.p. were making with their software.

After accepting only checks via mail for quite some time, I received an e-mail from a corporate prospect who asked me if I could accept payment via credit-card. I couldn’t at the time, but I asked him to let me see if I could find a card processor. My first attempt at signing up for a credit-card processor was very painful. I canceled the membership before any payments were processed.

I then signed up for RegSoft ( now a Digital River company. ) Although I seemed to have a gift for making mistakes along the way, signing up with RegSoft was the best decision I had made regarding MailSend. In 24 hours after signing up, they processed two orders for MailSend. Neither of these was from the gentleman who had asked if he could pay with a credit-card.

The registrations were becoming much more frequent than the checks-in-the-mail had been.

My mentors in a.c.s.p. attributed this to impulse buying; It’s easier to procrastinate on a purchase if it takes work ( such as writing out a check, addressing an envelope, …etc. ) If one can simply fill out a web page and receive a product electronically in a short time, chances are greater that the sale will commence.

Sometime around these years, I also began participating in the Euro-Share mail-list. Lots of good conversations went on there. You might Google Euro-share archive if you’d like to see the posts.

I should note that I also added a limiting factor to the trial version of MailSend at this time. One could only send about ten emails using the trial version. After that, they would have to reboot their machine so that they could send another ten. I’ll discuss protection and nagware in a future post.


Rising Sales

I would still refer to the monthly income I was making as hobby-level money, but it was GOOD hobby-level money. I began to grow my stable of products with some smaller utilities. ( Please see Throwaway Software : HangUp and A Command Line Scheduler. )

I had begun to make two-hundred to three-hundred dollars each month most months in the early days. I made a few deals for large site-license purchases of my products for several thousand dollars each. By the time the year 2000 had rolled around, I felt like a true independent software developer, although I didn’t make nearly enough to quit my day-job.


Pricing

One of the things that I had not planned for was the costs of doing business. My registration processor took a percentage of each sale. I bought software tools and libraries. $10 was too low of a price-point. When customers wanted bulk-purchase deals, I had already cut out most of the cost of the product, so I had no place to go.

Several of my customers told me that I wasn’t charging enough and regularly registered two copies for each one they needed.

Heeding some advice, I raised the price to $20. I placed a blurb on my site about the increase, stating that it would occur a couple of months away. I really didn’t see any sort of attempts to license MailSend before the change.

When I finally did change the price, it was a non-event. Customers paid $20 per copy and didn’t quibble a bit, unless they were interested in bulk-purchase discounts. Pricing the product was definitely something I should have given more consideration.


Upgrades

The biggest mistake I made was that I do not charge for upgrades for MailSend. I recently read a quote from Joel Spolsky who stated that nothing affects the sales of their defect-tracking software FogBugz like a new release with new features. Joel’s company ( Fog Creek ) sees more sales from upgrades when the new features are worth having than they do from any form of advertisement or sales event.

I have great plans for MailSend, but for a while, they took a back seat to paying gigs. When offered the chance to consult for a fee, I opted for the money instead of opting to work on my software.

I am going to have to rethink this policy as I still have many plans for MailSend’s future.


How the Dot-Com Crash Affected Me

A lot of things happened during the dot-com crash that adversely affected my sales. A number of sites that fed me customers went out of business. Tech companies seemed to be more frugal in their spending at that time and the popularity of my niche products began to wane.

In yet another mistake, I had ignored my mentors who recommended to all new software authors to secure a domain-name. Well, I didn’t. I had known the founders of my Internet Service Provider, so I decided to just keep my personal URL as the reference point for my software. The ISP ended up selling. The new ISP added a tilde (~) to my home page URL, so all of the links I had spread out among the various Usenet posts and other places were now broken.

I also believe that it’s much easier to send email in modern times. Most modern programming languages provide some type of SMTP interface in their runtime libraries, so my product isn’t quite as sought-after as it used to be.

Also, I face a lot of competition; there are a number of very capable command-line mailers out there. Some are free of charge. Some authors make their source code available via open-source licensing models.

MailSend and my other utilities still provide me with a passive cash-flow, but they don’t generate the revenue that they once did. I learned a lot and have a lot more to learn as I continue to support these products and forge ahead creating new ones.

My stories about these early programs aren’t over, yet. In a future Tale Recursion post, I’ll discuss the somewhat controversial topic of protection-schemes in trial software and will discuss the various techniques I have used.

You may obtain trial versions of MailSend and my other software at: ttp://www.mailsend-online.com

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2009 by James K. Lawless

→ Leave a CommentCategories: Tale Recursion
Tagged: ,

Hiding Batch File Console Windows

November 2, 2009 · Leave a Comment

( Please note:The good netizens of the alt.comp.msdos.batch.nt Usenet newsgroup pointed out a few things relating to this post.

A new API function is available for Windows 2000 and up called GetConsoleWindow() that alleviates some of the caption-changing operations that I perform in the code below.

There are other utilities which can perform the “hide” operation in addition to many other helpful windows operations. One such utility is “cmdow” available here: http://commandline.co.uk/cmdow/ )

I used to sell a command-line utility for Windows called RUN-and-HIDE that would launch a batch file in a hidden window. While a number of other scripts became available to do the same thing, I noted that I hadn’t seen any that would hide the window of an already-running console process from within that process’s executing batch file script.

I performed a cursory search this evening to see if a utility was to be found that could hide a running console process. Not only was I not able to find one, but many posts state firmly that unless you launch the console process, one absolutely can not hide it.

…so, let me show you how… ;-)

Most windows are easy enough to hide via the Windows API ShowWindow() function. You pass ShowWindow() a window-handle and a show-mode and the window may display differently. One option for the mode argument allows the window to be hidden. ( We could just as easily minimize the window, maximize it, move it, …etc. if we have the window-handle ).

The trick to obtaining the console window-handle is to set the caption to a unique string, then call the Windows FindWindow() function. We need a unique title because FindWindow() will yield the handle for the first window it finds that matches our criteria.

My code will call getpid() to obtain the current process ID for the running utilty and will use that value with a prefix of HIDECMD_ in the running EXE.

Here is the code:

hidecmd.c

// hidecmd.c
// Hide the current console window for a running batch
// script.
//
// License: MIT / X11
// Copyright (c) 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

#include <windows.h>
#include <stdio.h>
#include <process.h>
#pragma comment(lib,"user32.lib")
int main(int argc,char **argv) {
   int pid;
   char wrk[40];
   char old[256];
   HWND h;
      // get the process ID to use as a unique
      // number
   pid=getpid();
   sprintf(wrk,"HIDECMD_%d",pid);
      // preserve the old console window title
   GetConsoleTitle(old,sizeof(old));
      // replace it with a unique titles
   SetConsoleTitle(wrk);
      // give the system time to change the title
   Sleep(40);

      //.now, find the window handle by title
   h=FindWindow(NULL,wrk);
      // hide the window
   ShowWindow(h,SW_HIDE);
      // replace the old title
   SetConsoleTitle(old);
      // wait just a bit for the update again
   Sleep(40);
}

To use hidecmd, place a call to it early in your batch file. You may see the window blink as it initially displays, then disappears.

hidedemo.bat

hidecmd
dir %windir%
exit

I recommend use of the exit verb to ensure that the batch script terminates.

The source and EXE files for hidecmd can be found here.

http://www.mailsend-online.com/wp/hidecmd.zip

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2009 by James K. Lawless

→ Leave a CommentCategories: C · Nonconformity
Tagged: , ,

My Foray into Shareware

October 27, 2009 · 1 Comment

In the early 90’s, I wanted to be an independent software developer. Heck, I wanted to be an indie in the 80’s, too. I just didn’t ever seem to be able to harmonize software I’d written with any kind of a need in the marketplace.

At this time, I was still in the habit of frequenting electronic bulletin-board systems (BBS’s). Although I had picked up this hobby while the venerable Commodore 64 was my computer of choice back in 1984, I primarily frequented bulletin-board systems running on MS-DOS ( often supplemented with Quarterdeck’s DesqView to support multiple incoming calls. )

A few of the BBS’s I was particularly drawn to were running on a nice little system called the Spitfire BBS written in Turbo Pascal by fellow Iowan Mike Woltz under the company name Buffalo Creek Software. Mike was selling this software independently. The sysops loved it. The users loved it. I was very impressed that someone who probably lived just a couple hours away from me was actually selling his own software independently and was obviously picking up some sales in my neck of the woods.

In addition to the BBS software itself, Spitfire was able to host third-party plug-ins called “doors”. Doors were simply external programs that observed a predefined parameter file format which contained information about the current user and such. The door program could then take over the online experience as an extension to the BBS itself.

One of the non-game-oriented doors available for Spitfire was a message-board add-on called CircuitNet. This was a networking plug-in that provided a series of nationwide, special-interest, moderated forums not unlike FidoNet.

Later, Woltz wrote his own add-on for Spitfire called SFNet which ultimately replaced SFNet on the BBS’s I used to frequent.

One of the SFNet nationwide forums was specifically for door program discussion. A number of doors were discussed. Game doors were quite popular. One door author would show up from time to time on the forum and would release some pretty good games as try-before-you-buy shareware. He made what I assume to be a small but nice enough amount of money at it, so I decided to try my hand at writing some software that I felt would be of interest to the Spitfire community.

My first attempt to write and sell my own software using the shareware marketing concept was a program called keymac. Keymac was an MS-DOS TSR ( terminate but stay-resident ) utility that I wrote in assembly language. It ran in the background, intercepting all keystrokes that were gathered via the standard BIOS and DOS API routines and would check for keys that had been defined in a macro file.

The macro file allowed one to cause a single key ( with CTRL, ALT, and SHIFT as modifiers ) to generate multiple virtual keystrokes. One could configure the macro text file so that by pressing F8, the characters for a string such as “Hi, there, friend!” would appear to have been typed at the keyboard.

I wrote this utility because a sysop on one of the Spitfire forums was hoping for some kind of macro facility in the Sysop chat mode so that frequently asked questions and such could be answered with the touch of a single key.

I wrote and tested the program and decided to sell it for three dollars. I thought that I’d be able to sell a couple dozen copies of the software over one of these forums and that really would have been enough sales for me.

I released the utility with no nag-screen nor expiration time-outs nor any limiting features on one of the sysops’ BBS’s and gave them a free copy for hosting it. Then, I announced it on one of the forums.

Another sysop … the one who had wished out loud for a macro feature … tried keymac and thanked me for it … but no payment ever came my way. I was dumbfounded. I had written this utility in some very tricky, very tight code and had gone through some considerable testing to make sure that it worked properly. People were using it but I wasn’t getting rich. I wasn’t getting anything.

One day, I had gotten home from work and was opening my mail when I noticed a personal letter from someone in a distant city that I didn’t know. Sure enough, a check for three dollars and a letter thanking me for the utility were in the envelope.

I ended up calling the guy to thank him and asked him how he used the software. He was just ecstatic about the program. He was a sysop, but he used the software on other computers for his own BBS’ing as a client. He said that he’d found many uses for it outside the realm of telecommunications altogether.

I was very happy. The money wasn’t really the issue. By sending a check, the gentleman had sent me an affirmation that I had written something that he found useful enough to pay me for it. That really made my day.

Inspired by my first sale, I ventured out to write another program for the same community. This time, I decided to write a door program as I thought that by targeting something that the BBS users would want instead of the sysops, I might make more sales.

My door was actually a door utility that ran as part of Spitfire’s scheduled maintenance activities. ( I can’t remember exactly how this worked, but I believe there were batch files that ran nightly that could be customized by the sysops ).

I wrote a little utility that would read the users scores from another door game and would post a message to a specified forum containing the high-score list or something like that. Again, I gave a copy to the sysop for the BBS I visited most often and made an announcement on one of the forums.

This time, I actually think I sold two copies of this utility, again at three dollars each. I was quite happy with that. I didn’t take long to write, except for some work with the sysop to ensure that I was posting the message according to the published data-structures. It took me a bit longer that it might have others because I was writing this utility in C and had to deal with data types specific to Turbo Pascal such as counted-strings/packed-arrays-of-characters; strings with a length byte at the beginning instead of a C-style zero-byte terminator.

I was just happy enough that a couple more people registered in a rather timely manner. However, one of the two had a problem with the program. I ended up spending enough time on a long-distance call to correct the program, that my gross sales for the two copies were eaten.

At that point, I ran out of enthusiasm for publishing much more in this particular genre of software. Soon enough, the Internet became readily available to the public and the BBS’s started to disappear.

From these early experiences, I had found that I needed to strongly consider how to price my products in future attempts at marketing software and also needed to ensure that I had some way to keep my support costs down.

Something that wasn’t quite obvious to me yet was that I was going to have to find a way to ensure that people who had planned to pay but were procrastinating could be nudged into paying in a timely manner.

My next steps into becoming an indie software developer were much more fruitful. I’ll tell you more about those in this Tale Recursion section of my blog soon.

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2009 by James K. Lawless

→ 1 CommentCategories: MS-DOS · Tale Recursion
Tagged: , , ,

Shrouding CSharp and Java Source Code with AWK

October 25, 2009 · Leave a Comment

I enjoy tinkering with both Java and C# and publish a lot of my source on this blog under a very liberal open-source license.

I have wondered, however, what I would do if I had to protect a commercial program written in either of the two. The obvious choice would be to purchase a professional obfuscation program that defeats most decompilers. Fair enough. Most of these work on the compiled intermediate code.

What, however, if I just wanted to put some mild protection into the compiled product? The mild protection I refer to would still allow the code to be decompiled back to source form, but the person decompiling would have to work hard to make use of the bulk of the code.

My solution was to try a simple shrouding utility.

Shrouding ( also referred to as fogging ) is a kind of source-level obfuscation that was somewhat popular years ago for C programs sold in source form. The programs would generally compile but would be unreadable enough that they would discourage someone from using portions of unlicensed code in other programs.

A lot of these source-level shroud utilities are language-aware and determine what variables, functions, methods, and classes are locally-scoped. The shroud utility then replaces these with generated identifiers.

Most shrouding utilities go a step further and obfuscate literal strings as well as rewriting flow-control constructs.

All I had intended to do was to change the locally-scoped identifier-names to something that my script generates.

My proof-of-concept is an AWK script called foggy.awk. foggy.awk replaces any identifier it finds beginning with two underscore characters with a new identifier containing the prefix “i_” and a sequentially generated number.

This means that in my code, I have to observe a coding convention. I will have to prefix the name of any identifier that should be shrouded with two underscores.

First, here’s the shrouding script:

foggy.awk

# Foggy.awk
# A source-code shrouding utility.
# Replace any "identifier" beginning with __ with a sequentially-
# numbered new identifier
#
# License: MIT / X11
# Copyright (c) 2009 by James K. Lawless
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
BEGIN {
   legal="abcdefghijklmnopqrstuvwxyz";
   legal=legal "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
   legal=legal "_0123456789" ;
   id_counter=0;
   id_list["__"]="__";
}
{
   state=0;
   s="";
   for(i=1;i<=length($0);i++) {
      c=substr($0,i,1);
      if(state==0) {
         if(c=="_") {
            state=1;
         }
         else {
            s=s c;
         }
      }
      else {
         if(state==1) {
            if(c=="_") {
               state=2;
               id="__";
            }
            else {
               s=s "_" c;
               state=0;
            }
         }
         else {
            # state == 2
            if(index(legal,c)<1) {
               if(id_list[id]=="") {
                  tmp_id="i_" id_counter;
                  id_counter++;
                  id_list[id]=tmp_id;
               }
               s=s id_list[id];
                  # decrement the index to forget the current
                  # character we're looking at
               i--;
               state=0;
            }
            else {
               id=id c;
            }
         }
      }
   }
   if(state==0) {
      printf("%s\n",s);
   }
   else {
      if(state==1) {
         printf("%s_\n",s);
      }
      else {
         if(state==2) {
            if(id_list[id]=="") {
               tmp_id="i_" id_counter;
               id_counter++;
               id_list[id]=tmp_id;
            }
            printf("%s%s\n",s,id_list[id]);
         }
      }
   }
}

As a side note, I have compiled the above script with the Thompson Automation AWK (TAWK) compiler for Windows into the exe foggy.exe available in the foggy.zip file referenced at the end of this post.

Unfortunately, TAWK is no longer commercially available. I reviewed this compiler years ago in Dr. Dobbs Journal. ( See Examining the TAWK Compiler, DDJ May ‘97 here: http://www.ddj.com/architect/184410193 …)

I also tested with GNU GAWK for Windows.

I would have preferred to use a regular expression with captured groups, but the AWK tools I have do not support those sorts of regex features. This likely would have been a smaller Perl script, but I wanted to try it in AWK. The necessary parser state transitions didn’t seem to be too deep, so I gave it a go.

To use the above script either type:

gawk -f foggy.awk < inputfle > outputfile

...or, if you're using foggy.exe

foggy.awk < inputfle > outputfile

Let's first take a look at a Java program that now contains shroud-ready identifiers. I took the source from MicroHttp1.java from my post here: http://jimlawless.wordpress.com/2009/08/23/tracing-xslt-with-a-tiny-java-web-server/

// MicroHttp1
// A small , specialized web server in Java
//
// License: MIT / X11
// Copyright (c) 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

import java.io.*;
import java.net.*;
import java.util.Date;

public class MicroHttp1Obf {
   public static void main(String[] __args)
      throws IOException {
      ServerSocket __serv;
      Socket __s;
      String __str;
      PrintWriter __pw;
      BufferedReader __br;
      __serv=new ServerSocket(80);
      System.out.println("Micro HTTP Server v 0.1");
      System.out.println();
      for(;;){
         __s=__serv.accept();
         __br=new BufferedReader(
            new InputStreamReader(
               __s.getInputStream()));
         for(;;) {
            __str=__br.readLine();
            System.out.println(__str);
            if( ! __br.ready())
               break;
         }
         System.out.println();
         __pw=new PrintWriter(
            __s.getOutputStream(), true);

         __pw.print("HTTP 200 OK\r\n");
         __pw.print("Content-type: text/html\r\n\r\n");
         __pw.print("<html><head /><body>");
         __pw.print("Current date/time " + new Date());
         __pw.print("</body></html>");
         __pw.close();
      }
   }
}

Note that I had to change the class definition to what I would be targetting for my output filename, since the main Java class and source filename prefix must match.

There are lots of underscores now in this short script. Here's the shrouded output:

// MicroHttp1
// A small , specialized web server in Java
//
// License: MIT / X11
// Copyright (c) 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

import java.io.*;
import java.net.*;
import java.util.Date;

public class MicroHttp1Obf {
   public static void main(String[] i_0)
      throws IOException {
      ServerSocket i_1;
      Socket i_2;
      String i_3;
      PrintWriter i_4;
      BufferedReader i_5;
      i_1=new ServerSocket(80);
      System.out.println("Micro HTTP Server v 0.1");
      System.out.println();
      for(;;){
         i_2=i_1.accept();
         i_5=new BufferedReader(
            new InputStreamReader(
               i_2.getInputStream()));
         for(;;) {
            i_3=i_5.readLine();
            System.out.println(i_3);
            if( ! i_5.ready())
               break;
         }
         System.out.println();
         i_4=new PrintWriter(
            i_2.getOutputStream(), true);

         i_4.print("HTTP 200 OK\r\n");
         i_4.print("Content-type: text/html\r\n\r\n");
         i_4.print("<html><head /><body>");
         i_4.print("Current date/time " + new Date());
         i_4.print("</body></html>");
         i_4.close();
      }
   }
}

There are really only about five shrouded identifiers in this program. I suppose someone who really wanted to work at it could perform global search-and-replace operations to change them into something meaningful as they analyze the code.

Let's try something a little larger that has more local identifiers. For this next test, I took the C# source code from my command-line Twitter client Twimmando. See: http://jimlawless.wordpress.com/2009/05/24/twimmando-a-command-line-twitter-client/

Here's the shroud-ready source:

// Twimmando - a command-line Twitter client

// License: MIT / X11
// Copyright (c) 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

using System;
using System.Net;
using System.Text;
using System.IO;
using System.Runtime.Serialization.Formatters.Binary;
using System.Web;

namespace Twimmando
{

   public enum __RequestMethod
   {
      __Get,
      __Post,
      __Head
   }
   class __Config
   {
      public __RequestMethod __Method;
      public string __Uri;
      public string __UserName;
      public string __Password;
      public string __PostData;
   }

   class __Twimmando
   {
      public static string __Version = "Twimmando v1.01 (obfuscated)";
      public static void Main(string[] __args)
      {
         __Config __config;
         try
         {
            __config=__processCommandLine(__args);
            if(__config.__Uri==null)
               Environment.Exit(1);
            __doHttpRequest(__config);
         }
         catch(Exception e)
         {
            Console.Error.WriteLine(e.ToString());
            Environment.Exit(1);
         }
         Environment.Exit(0);
      }

      public static __Config __processCommandLine(string[] __args)
      {
         int __i;
         __Config __config=new __Config();
         __config.__PostData="";
         __config.__Method=__RequestMethod.__Get;

         for(__i=0;__i<__args.Length;__i++) {
            if(string.Compare(__args[__i],"-head",true)==0) {
               __config.__Method=__RequestMethod.__Head;
            }
            else
            if(string.Compare(__args[__i],"-post",true)==0) {
               __config.__Method=__RequestMethod.__Post;
            }
            else
            if(string.Compare(__args[__i],"-uri",true)==0) {
               __config.__Uri=__args[++__i];
            }
            else
            if(string.Compare(__args[__i],"-u",true)==0) {
               __config.__UserName=__args[++__i];
            }
            else
            if(string.Compare(__args[__i],"-p",true)==0) {
               __config.__Password=__args[++__i];
            }
            else
            if(string.Compare(__args[__i],"-f",true)==0) {
               if(__config.__PostData.Length>0) {
                  __config.__PostData+="&";
               }
               __config.__PostData+=__args[__i+1]+"="+HttpUtility.UrlEncode(__args[__i+2]);
               __i+=2;
            }
         }
         if(__config.__Uri==null) {
            Console.WriteLine(__Twimmando.__Version + "\nby Jim Lawless (@lawlessGuy)");
            Console.WriteLine("Usage:\n\ttwimmando [options]\nWhere options are:");
            Console.WriteLine("   -uri resourceURI   ; such as /statuses/public_timeline.xml");
            Console.WriteLine("   -u username        ; Twitter password");
            Console.WriteLine("   -p password        ; Twitter user name");
            Console.WriteLine("   -head              ; send HTTP HEAD request, default is GET");
            Console.WriteLine("   -post              ; send HTTP POST request, default is GET");
            Console.WriteLine("   -f name value      ; add POST data name/value pair to request");
         }
         return __config;
      }

      public static void __doHttpRequest(__Config __config)
      {

         HttpWebRequest __webRequest;
			HttpWebResponse __webResponse;
         Uri __uri=new Uri("http://twitter.com" + __config.__Uri);
			__webRequest = (HttpWebRequest)HttpWebRequest.Create(__uri);
         if(__config.__Password!=null) {
            __webRequest.Credentials = new NetworkCredential(__config.__UserName, __config.__Password);
         }

	 __webRequest.UserAgent = __Twimmando.__Version;
         if(__config.__Method==__RequestMethod.__Post) {
            __webRequest.Method = "POST";
               // remove Expect header
            __webRequest.ServicePoint.Expect100Continue = false;
            __webRequest.ContentType="application/x-www-form-urlencoded";
            __webRequest.ContentLength = __config.__PostData.Length;
            StreamWriter __sOut=new StreamWriter(__webRequest.GetRequestStream(),System.Text.Encoding.ASCII);
            __sOut.Write(__config.__PostData);
            __sOut.Close();
         }
         else
         if(__config.__Method==__RequestMethod.__Head) {
            __webRequest.Method="HEAD";
         }
	 __webResponse = (HttpWebResponse)__webRequest.GetResponse();
         if(__config.__Method==__RequestMethod.__Head) {
            Console.WriteLine(__webResponse.Headers.ToString());
         }
         else {
            Stream __stream = __webResponse.GetResponseStream();
            StreamReader __streamReader =
               new StreamReader(__stream, Encoding.ASCII);
            Console.WriteLine(__streamReader.ReadToEnd());
         }
         __webResponse.Close();
      }
   }
}

Here's the encoded result:

// Twimmando - a command-line Twitter client

// License: MIT / X11
// Copyright (c) 2009 by James K. Lawless
// jimbo@radiks.net http://www.radiks.net/~jimbo
// http://www.mailsend-online.com
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

using System;
using System.Net;
using System.Text;
using System.IO;
using System.Runtime.Serialization.Formatters.Binary;
using System.Web;

namespace Twimmando
{

   public enum i_0
   {
      i_1,
      i_2,
      i_3
   }
   class i_4
   {
      public i_0 i_5;
      public string i_6;
      public string i_7;
      public string i_8;
      public string i_9;
   }

   class i_10
   {
      public static string i_11 = "Twimmando v1.01 (obfuscated)";
      public static void Main(string[] i_12)
      {
         i_4 i_13;
         try
         {
            i_13=i_14(i_12);
            if(i_13.i_6==null)
               Environment.Exit(1);
            i_15(i_13);
         }
         catch(Exception e)
         {
            Console.Error.WriteLine(e.ToString());
            Environment.Exit(1);
         }
         Environment.Exit(0);
      }

      public static i_4 i_14(string[] i_12)
      {
         int i_16;
         i_4 i_13=new i_4();
         i_13.i_9="";
         i_13.i_5=i_0.i_1;

         for(i_16=0;i_16<i_12.Length;i_16++) {
            if(string.Compare(i_12[i_16],"-head",true)==0) {
               i_13.i_5=i_0.i_3;
            }
            else
            if(string.Compare(i_12[i_16],"-post",true)==0) {
               i_13.i_5=i_0.i_2;
            }
            else
            if(string.Compare(i_12[i_16],"-uri",true)==0) {
               i_13.i_6=i_12[++i_16];
            }
            else
            if(string.Compare(i_12[i_16],"-u",true)==0) {
               i_13.i_7=i_12[++i_16];
            }
            else
            if(string.Compare(i_12[i_16],"-p",true)==0) {
               i_13.i_8=i_12[++i_16];
            }
            else
            if(string.Compare(i_12[i_16],"-f",true)==0) {
               if(i_13.i_9.Length>0) {
                  i_13.i_9+="&";
               }
               i_13.i_9+=i_12[i_16+1]+"="+HttpUtility.UrlEncode(i_12[i_16+2]);
               i_16+=2;
            }
         }
         if(i_13.i_6==null) {
            Console.WriteLine(i_10.i_11 + "\nby Jim Lawless (@lawlessGuy)");
            Console.WriteLine("Usage:\n\ttwimmando [options]\nWhere options are:");
            Console.WriteLine("   -uri resourceURI   ; such as /statuses/public_timeline.xml");
            Console.WriteLine("   -u username        ; Twitter password");
            Console.WriteLine("   -p password        ; Twitter user name");
            Console.WriteLine("   -head              ; send HTTP HEAD request, default is GET");
            Console.WriteLine("   -post              ; send HTTP POST request, default is GET");
            Console.WriteLine("   -f name value      ; add POST data name/value pair to request");
         }
         return i_13;
      }

      public static void i_15(i_4 i_13)
      {

         HttpWebRequest i_17;
			HttpWebResponse i_18;
         Uri i_19=new Uri("http://twitter.com" + i_13.i_6);
			i_17 = (HttpWebRequest)HttpWebRequest.Create(i_19);
         if(i_13.i_8!=null) {
            i_17.Credentials = new NetworkCredential(i_13.i_7, i_13.i_8);
         }

	 i_17.UserAgent = i_10.i_11;
         if(i_13.i_5==i_0.i_2) {
            i_17.Method = "POST";
               // remove Expect header
            i_17.ServicePoint.Expect100Continue = false;
            i_17.ContentType="application/x-www-form-urlencoded";
            i_17.ContentLength = i_13.i_9.Length;
            StreamWriter i_20=new StreamWriter(i_17.GetRequestStream(),System.Text.Encoding.ASCII);
            i_20.Write(i_13.i_9);
            i_20.Close();
         }
         else
         if(i_13.i_5==i_0.i_3) {
            i_17.Method="HEAD";
         }
	 i_18 = (HttpWebResponse)i_17.GetResponse();
         if(i_13.i_5==i_0.i_3) {
            Console.WriteLine(i_18.Headers.ToString());
         }
         else {
            Stream i_21 = i_18.GetResponseStream();
            StreamReader i_22 =
               new StreamReader(i_21, Encoding.ASCII);
            Console.WriteLine(i_22.ReadToEnd());
         }
         i_18.Close();
      }
   }
}

This shrouded program was much more difficult for me to read, as I used some local class definitions and used more local variables and methods than I had used in the sample Java program.

My conclusion?

Well, if I had to employ this method, I think I could grow into learning to deal with the added noise of the extra underscores in the source.

However, I still think that language-aware shrouding utilities would be much less painful to use.

The source and Windows executable file for foggy.awk can be downloaded in a single archive at:
http://www.mailsend-online.com/wp/foggy.zip

del_icio_us Save to del.icio.us
digg Digg it
reddit Save to Reddit
facebook Share on Facebook
twitter Share on Twitter
aolfav More bookmarks


Unless otherwise noted, all code and text entries are Copyright © 2009 by James K. Lawless

→ Leave a CommentCategories: AWK / GAWK · CSharp · Java · Nonconformity
Tagged: , , , , ,