' Mike Trader 2009 (based on work by Stan Durham)
' This version requires a little more memory (12*nElem bytes)
' Sorting an array of 100k strings of random length (0-255 Chars), PB avg time is 9550 Clks/String, Sidewinder is 2040 Clks/String
' Thats 5x faster WITHOUT the benfit of ASM.
' Can be called with:
' CALL SWSort( VARPTR(b(First)), Last-First ) ' Simplist call
' or with all the options:
' CALL SWSort( VARPTR(b(First)), Last-First, VARPTR(pData(First)), CaseIgnore, Descending ) ' Full Call
'Where:
' First = First element in the ARRAY = LBOUND()
' Last = Last element in the ARRAY = UBOUND()
#If 0
A LONG/QUAD ARRAY sorting algorithm designed FOR:
Handle any length String including Zero length
ASCENDING/DESCENDING order
TAG an ARRAY of pointers
Select FIRST & LAST elements TO SORT
Equivelent speed to PBs SORT ARRAY
Zero or 1+ based Arrays
A generic CALL that is user friendly
High level language compatible (no ASM or PB keywords)
PB does not allow passing arrays BYREF as an argument,
The function would be 2x faster using an array of pointers
rather than pointer offsets ie @pArr[i].
#EndIf
#COMPILE EXE "Sidewinder.exe"
#DIM ALL
#INCLUDE "WIN32API.INC"
GLOBAL hDbg AS LONG
TYPE BucketInfoTYPE ' For Sidewinder
BukNum(255) AS LONG
BktBeg(255) AS LONG
BktEnd(255) AS LONG
LastBktNum AS LONG
LastBktBeg AS LONG
END TYPE
TYPE MyTYPE ' An example UDT to Tag Sort
Rand AS LONG
Num AS LONG
Beg AS LONG
sName AS STRING * 255
END TYPE
'¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤'
SUB time_stamp_count(tick AS QUAD) ' CPU Clock count Charles E V Pegge
'---------------------------'
' ' approx because it is not a serialised instruction
' ' it may execute before or after other instructions
' ' in the pipeline.
! mov ebx,tick ' var address where count is to be stored.
! db &h0f,&h31 ' RDTSC read time-stamp counter into edx:eax hi lo.
! mov [ebx],eax ' save low order 4 bytes.
! mov [ebx+4],edx ' save high order 4 bytes.
'---------------------------'
END SUB
'¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
FUNCTION ReadTextFile( sPathnFile AS STRING, sTxt() AS STRING ) AS LONG ' Zero Based Array
LOCAL hFile, TotRec, Count AS LONG
hFile = FREEFILE ' nElements = LOF(hFile) \ SIZEOF (MyUDT)
OPEN sPathnFile FOR INPUT AS #hFile ' LEN = SIZEOF(UDT(0)) ' path and file in element 2
IF ERR THEN
CLOSE hFile
FUNCTION = -ERR
EXIT FUNCTION
END IF
FILESCAN #hFile, RECORDS TO TotRec
REDIM sTxt(TotRec)
Count = 0
WHILE NOT EOF(hFile)
INCR Count
LINE INPUT #hFile, sTxt(Count)
WEND ' PRINT #hDbg, "TotRec="+STR$(TotRec) + ", Count="+STR$(Count)
CLOSE hFile
FUNCTION = Count
END FUNCTION
'¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤'
SUB SidewinderRecur( BYVAL pArr AS LONG PTR, _ ' Pointer to String array
BYVAL pBuf AS LONG PTR, _ ' Pointer to Long array for string sorting
BYVAL pTag AS LONG PTR, _ ' Pointer to Tagged Pointer array
BYVAL pTuf AS LONG PTR, _ ' Pointer to Long array for Tagged Pointer array sorting
BYVAL pChr AS LONG PTR, _ ' Pointer to Array of character values
BYVAL Pos AS LONG, _ ' Character position in the string
BYVAL First AS LONG, _ ' First element in the String Array to sort
BYVAL Last AS LONG, _ ' Last element in the String Array to sort
BYVAL CsIg AS LONG, _ ' Ignore Case Flag
BYVAL Desc AS LONG ) ' Sort Descending Flag
LOCAL i, bVal AS LONG
LOCAL pByt AS BYTE PTR
LOCAL BI AS BucketInfoTYPE ' Faster than DIM each seperatly
' DIM Arr(First TO Last) AS QUAD AT pArr + 4*First ' PB cannot pass an array of pointers BYREF
' DIM Buf(First TO Last) AS QUAD AT pBuf + 4*First
' DIM Tag(First TO Last) AS DWORD AT pTag + 4*First
' DIM Tuf(First TO Last) AS QUAD AT pTuf + 4*First
' DIM Chr(First TO Last) AS QUAD AT pChr + 4*First
INCR Pos
BI.LastBktBeg = First
BI.LastBktNum = 0
IF Desc THEN ' Descending
'- Count occurence of each bVal
FOR i = First TO Last
pByt = @pArr[i] ' Arr(i)
@pChr[i] = @pByt[Pos] ' PRINT #hDbg, "i=" + STR$(i) + ", bVal=" + STR$(@pChr[i])
IF CsIg AND @pChr[i] > 96 AND @pChr[i] < 123 THEN @pChr[i] = @pChr[i] - 32
INCR BI.BukNum(@pChr[i])
NEXT i
'- Determine the start pos/ Size of each bucket at the current character Pos
FOR bVal = 255 TO 0 STEP -1
IF BI.BukNum(bVal) THEN
BI.BktBeg(bVal) = BI.LastBktBeg + BI.LastBktNum
BI.BktEnd(bVal) = BI.BktBeg(bVal) - 1
BI.LastBktBeg = BI.BktBeg(bVal)
BI.LastBktNum = BI.BukNum(bVal)
END IF
NEXT bVal
ELSE ' Ascending
FOR i = Last TO First STEP -1
pByt = @pArr[i]
@pChr[i] = @pByt[Pos]
IF CsIg AND @pChr[i] > 96 AND @pChr[i] < 123 THEN @pChr[i] = @pChr[i] - 32
INCR BI.BukNum(@pChr[i])
NEXT i
FOR bVal = 0 TO 255
IF BI.BukNum(bVal) THEN
BI.BktBeg(bVal) = BI.LastBktBeg + BI.LastBktNum
BI.BktEnd(bVal) = BI.BktBeg(bVal) - 1
BI.LastBktBeg = BI.BktBeg(bVal)
BI.LastBktNum = BI.BukNum(bVal)
END IF
NEXT bVal
END IF
'- Place each string in proper bucket
FOR i = First TO Last
INCR BI.BktEnd(@pChr[i]) ' Chr(i)
@pBuf[BI.BktEnd(@pChr[i])] = @pArr[i] ' Arr(i)
@pTuf[BI.BktEnd(@pChr[i])] = @pTag[i] ' Tag(i)
NEXT i
'- Copy sorted order to original array
FOR i = First TO Last
@pArr[i] = @pBuf[i] ' Buf(i)
@pTag[i] = @pTuf[i] ' Tuf(i)
NEXT i ' PRINT #hDbg, "Round"+STR$(Pos+1)+" Completed --------" + $CRLF
'- Recursively sort each slot of elements for each Pos
IF Desc THEN ' Descending
FOR bVal = 1 TO 255
IF BI.BukNum(bVal) > 1 THEN CALL SidewinderRecur( pArr, pBuf, pTag, pTuf, pChr, Pos, BI.BktBeg(bVal), BI.BktEnd(bVal), CsIg, Desc )
NEXT bVal
ELSE ' Ascending
FOR bVal = 255 TO 1 STEP - 1
IF BI.BukNum(bVal) > 1 THEN CALL SidewinderRecur( pArr, pBuf, pTag, pTuf, pChr, Pos, BI.BktBeg(bVal), BI.BktEnd(bVal), CsIg, Desc )
NEXT bVal
END IF
END SUB
'¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤'
SUB SWSort( BYVAL pArr AS DWORD PTR, BYVAL nElem AS LONG, _
OPT BYVAL pTag AS DWORD PTR, BYVAL CsIg AS LONG, BYVAL Desc AS LONG )
LOCAL i, First AS LONG
IF pTag = 0 THEN ' Create a Dummy array
DIM Tag(nElem) AS LONG
pTag = VARPTR(Tag(0)) ' ALL Zero Based Arrays from here on
END IF
' DIM Tag(nElem) AS DWORD AT pTag ' PB cannot pass an array of pointers BYREF
' DIM Arr(nElem) AS DWORD AT pArr
IF Desc THEN
FOR i = nElem TO 0 STEP -1
IF @pArr[i] = 0 THEN ' Swap any NULL strings to far left
IF i < nElem THEN SWAP @pArr[i], @pArr[nElem] : SWAP @pTag[i], @pTag[nElem]
DECR nElem
END IF
NEXT i ' PRINT #hDbg, "First=" + STR$(First)
ELSE
FOR i = 0 TO nElem
IF @pArr[i] = 0 THEN ' Swap any NULL strings to far left
IF i > First THEN SWAP @pArr[i], @pArr[First] : SWAP @pTag[i], @pTag[First]
INCR First
END IF
NEXT i ' PRINT #hDbg, "First=" + STR$(First)
END IF
DIM Chr(nElem) AS LONG ' Holds a Character from each string
DIM Buf(nElem) AS LONG ' Buffer for sorting Stings
DIM Tuf(nElem) AS LONG ' Buffer for sorting Tagged Pointers
CALL SidewinderRecur( pArr, VARPTR(Buf(0)), pTag, VARPTR(Tuf(0)), VARPTR(Chr(0)), -1, First, nElem, CsIg, Desc )
END SUB
'¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤'
FUNCTION PBMAIN()
LOCAL i, j, RetVal, CaseIgnore, Descending AS LONG
LOCAL First, Last, TotElem, Count AS LONG
LOCAL shortestString, longestString, lowestChar, highestChar AS LONG
LOCAL cBeg, cEnd AS QUAD ' for time stamp, measuring cpu Clks
LOCAL s AS STRING
LOCAL sTxt() AS STRING
LOCAL a() AS STRING
LOCAL b() AS STRING
hDbg = FREEFILE : OPEN "Sidewinder_Debug.txt" FOR OUTPUT LOCK SHARED AS hDbg
PRINT #hDbg, "Time=" + TIME$ + ", Date=" + DATE$
'=========
First = 1
DIM sTxt(First)
TotElem = ReadTextFile( "UCaseNames.csv", sTxt() ) ' base 1
PRINT #hDbg, "Read "+STR$(TotElem)+" Names"
Last = TotElem
Last = 99000
DIM a(Last) AS STRING '
DIM b(Last) AS STRING '
DIM DataType(Last) AS MyTYPE
DIM pData(Last) AS MyTYPE PTR ' Cannot pass an array of pointers BYVAL with PB
'- Xfer random length strings
' FOR i = First TO Last
'IF i = 4 THEN sTxt(i) = "" ' Test NULL string handling
'IF i = 9 THEN sTxt(i) = ""
' a(i) = sTxt(i)
' b(i) = sTxt(i) '
' DataType(i).sName = sTxt(i)
' DataType(i).Rand = RND(-2147483648, 2147483647)
' DataType(i).Num = i
' pData(i) = VARPTR(DataType(i))
'IF i < 100 THEN PRINT #hDbg, STR$(i) + STR$(@pData(i).Num) + " " + STR$(STRPTR(b(i))) + " >" + TRIM$(@pData(i).sName) + "< "
' NEXT '
'PRINT #hDbg, " --------" + $CRLF
'==========
shortestString = 0 ' shortest random string
longestString = 255 ' longest random string
lowestChar = 32 ' ASC() smallest random character
highestChar = 126 ' ASC() largest random character
'- Make a series of random length strings
FOR i = First TO Last
s = ""
FOR j = First TO RND(shortestString, longestString)
s = s + CHR$(RND(lowestChar, highestChar))
NEXT j
a(i) = s
b(i) = s
DataType(i).sName = s
DataType(i).Rand = RND(-2147483648, 2147483647)
DataType(i).Num = i
pData(i) = VARPTR(DataType(i))
IF i < 100 THEN PRINT #hDbg, STR$(i) + " Len=" + STR$(LEN(TRIM$(@pData(i).sName))) + " pData=" + STR$(pData(i)) + " >" + TRIM$(@pData(i).sName) + "< " + STR$(@pData(i).Num)
NEXT i
'==========
s = ""
time_stamp_count(cBeg) ' measuring CPU Clks. The overhead just for making this call is about 25 clocks
ARRAY SORT a(First) ', DESCEND
time_stamp_count(cEnd) ' measuring CPU Clks. The overhead just for making this call is about 25 clocks
s = s + "ARRAY SORT Clks="+STR$( (cEnd-cBeg)\Last ) + $CRLF
CaseIgnore = 0
Descending = 0
time_stamp_count(cBeg) ' measuring CPU Clks. The overhead just for making this call is about 25 clocks
' CALL SWSort( VARPTR(b(First)), Last-First ) ' Simplist call w/no tag array
CALL SWSort( VARPTR(b(First)), Last-First, VARPTR(pData(First)), CaseIgnore, Descending ) ' Full Call
time_stamp_count(cEnd) ' Measuring CPU Clks. The overhead just for making this call is about 25 clocks
s = s + "SideWinder Clks="+STR$( (cEnd-cBeg)\Last ) + $CRLF + $CRLF
'=========
'- Compare Sort Results
' PRINT #hDbg, ""
' FOR i = First TO Last
' IF TRIM$(a(i)) <> TRIM$(b(i)) THEN
' PRINT #hDbg, STR$(i) + $TAB + ">" +TRIM$(a(i)) + "< >" + TRIM$(b(i)) + "<--- out of order"
' ELSE
' IF i < 100 THEN PRINT #hDbg, STR$(i) + $TAB + a(i)
' END IF
' NEXT i
PRINT #hDbg, ""
FOR i = First TO Last
IF TRIM$(a(i)) <> TRIM$(@pData(i).sName) THEN
PRINT #hDbg, STR$(i)
PRINT #hDbg, ">" +TRIM$(a(i)) + "<"
PRINT #hDbg, ">" +TRIM$(@pData(i).sName) + "<--- Bad Match"
MSGBOX STR$(i)+"Sorted Element out of order" : EXIT FOR
ELSE
IF i < 100 THEN PRINT #hDbg, STR$(i) + $TAB + a(i)
END IF
NEXT i
PRINT #hDbg, s
MSGBOX s,64,STR$(Last-First)+" Random Length STRINGs"
END FUNCTION
'¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤'
Bookmarks