Sometimes, it may be necessary to expose data in your flat file with the relational data. There might be some situations where there is no option to load that data in a table. But still, you need a way to use this flat file data in your SQL statements.

DB2 provides the concept of a table function that you can write. Here is one C table UDF example that reads a file and returns that data through your SQL statement.

Download sample code ufd.zip

udfreadfile.c

#include <stdio.h>
#include <stdarg.h>
#include <sqludf.h> 

#define  DEBUG 0

struct SCRATCHDATA
{
  long idx;
  FILE *p;
};


void hardw(char *msg, ...)
{
   FILE *p;
   char name[500];
   va_list argp;

   if (DEBUG) 
   {
      strcpy(name, getenv("HOME"));
      if (name == NULL)
      {
         strcpy(name, "/tmp/hard.txt");
      } else
      {
         strcat(name, "/sqllib/hard.txt");
      }
      p = fopen(name, "a");
      if (p == NULL)
      {
         p = fopen("/tmp/hard.txt", "a");
      }
      if (p != NULL)
      {
         va_start(argp, msg);
         vfprintf(p, msg, argp);
         va_end(argp);
         fprintf(p, "\n");
         fflush( p);
         fclose(p);
      } 
   }
} 

void SQL_API_FN  readFile(SQLUDF_VARCHAR *fileName, 
              SQLUDF_VARCHAR *cm15, 
              SQLUDF_NULLIND *fileNameInd, 
              SQLUDF_NULLIND *cm15IND, 
              SQLUDF_TRAIL_ARGS_ALL)
{
  char myData[100];
  long i = 0;
  char errMsg[1000];
  struct SCRATCHDATA *sp;
  sp = (struct SCRATCHDATA *) SQLUDF_SCRAT->data;

  switch (SQLUDF_CALLT)
  {
  case SQLUDF_TF_FIRST:
      hardw("in SQLUDF_TF_FIRST ******");
      if (*fileNameInd == -1)
      {
         strcpy( SQLUDF_STATE, "38777");
         sprintf (errMsg, "Null file name.");
         strcpy( SQLUDF_MSGTX, errMsg);
         break;
      }
      sp->p = fopen(fileName, "r");
      if (sp->p == NULL)
      {
         strcpy( SQLUDF_STATE, "38777");
         sprintf (errMsg, "Given file can not be opened", fileName);
         strcpy( SQLUDF_MSGTX, errMsg);
         hardw("File '%s'can not be opened", fileName);
         break;
      }
      sp->idx = 0;
    case SQLUDF_TF_OPEN:
      hardw("in SQLUDF_TF_OPEN ******");
      break;
    case SQLUDF_TF_FETCH:
      hardw("in SQLUDF_TF_FETCH %ld ******", sp->idx);
      if (fgets(myData, 100, sp->p) == NULL)
      {
         strcpy( SQLUDF_STATE, "02000");
         break;
      }
      myData[strlen(myData)-1] = '\0';
      strcpy(cm15, myData);
      *cm15IND = 0;
      sp->idx++;
      break;
    case SQLUDF_TF_CLOSE:
       hardw("in SQLUDF_TF_CLOSE ******");
       fclose(sp->p);
      break;
    case SQLUDF_TF_FINAL:
      /* close the file */
      hardw("in SQLUDF_TF_FINAL ******");
      sp->idx = 0;
      break;
  }
}

udfreadfile.exp

readFile

How to catalog Table UDF (udfcat)

The catalog definition to register C table UDF in DB2 catalog is given below:

#! /bin/ksh
export ${FENCED:="FENCED NOT THREADSAFE"}
export ${SHLIBNAME:="udfreadfile"}
TOK=$(date +"%y%m%d%H%M%S")
export SHLIBNAME=udfreadfile$TOK
rm -f ~/sqllib/function/udfreadfile*
cp -f udfreadfile ~/sqllib/function/$SHLIBNAME

db2 -tv <<!EOF

CONNECT TO $1;

DROP FUNCTION READ_FILE;

CREATE FUNCTION READ_FILE
(
  FILE_NAME       VARCHAR(128)
)
RETURNS TABLE (c1 varchar(15))
EXTERNAL NAME '${SHLIBNAME}!readFile'
SPECIFIC READFILE
LANGUAGE C
PARAMETER STYLE DB2SQL
${FENCED}
RETURNS NULL ON NULL INPUT
NOT DETERMINISTIC
NO SQL
NO EXTERNAL ACTION
SCRATCHPAD
FINAL CALL
DISALLOW PARALLEL;

The above is a script so make sure you set execute bit on it. i.e. chmod +x udfcat

How to compile?

On your platform, copy bldrtn from your ~/sqllib/sample/c dir to your local directory and use this makefile to compile this program.

DB2PATH=${HOME}/sqllib

BLDRTN=./bldrtn

ALIAS=sample
UID=
PWD=

COPY=cp
ERASE=rm -f

all : \
	srv

srv : \
	rtn

rtn : \
	udfreadfile

clean : 
	$(ERASE) *.o

#****************************************************************************
#		   2h - make cleanall
#****************************************************************************

cleanall : \
	clean
	$(ERASE) udfreadfile
	$(ERASE) $(DB2PATH)/function/udfreadfile

udfreadfile : udfreadfile.c
	$(BLDRTN) udfreadfile $(ALIAS)
	./udfcat $(ALIAS) > udfcat.log

How this UDF works?

The sample code given in this UDF reads a flat file having a column of data. The size of the data is 15 character and file can have as many rows as possible.

The UDF defines a scratchpad memory area where we store the file pointer so that it can be used for each row. When this UDF is executed for each row, the different segment of code are executed in certain order.

For example:

SQLUDF_TF_FIRST is used to initialize variables or open a file pointer. This will be executed only once.
SQLUDF_TF_FETCH is used for each row execution. Remember, there is no while loop to go through the contents of the file since different code segments are used for different events.
Use SQLUDF_TF_CLOSE or SQLUDF_TF_FINAL to free up file pointer or free up memory.

FINAL CALL is used in UDF definition so that DB2 can invoke calls for SQLUDF_TF_FIRST and SQLUDF_TF_FINAL code segments. SCRATCHPAD is used to store file pointer and make sure that the size of the SCRATCHPAD is sufficient enough to hold the structure. The default size is only 100 bytes.

How to invoke the table UDF?

SELECT * FROM TABLE (db2inst1.read_file('$HOME/tableudf/datafilename.txt'))

You can use this UDF in your query to join it with other tables etc or use it in a sub-query.