[ create a new paste ] login | about

Link: http://codepad.org/A5nu1LoT    [ raw code | fork ]

hurracane - C, pasted on Jul 1:
//Takes two strings and determines if the pattern is part of the text. Returns the position where the substring starts or -1 if pattern isn't a substring.
//Because false is represented by the number 0, we return -1 in case of failure to distinguish between an immediate match (on pos 0) and a pattern that we couldn't find.
isSubStr(pattern, text) {
	pattern = strToLower(pattern); // Convert to lowercase. Assume all mapnames are lowercase.
	shift_function = compute_shift_function(pattern);
	text_length = text.size;
	pattern_length = pattern.size;
	
	//Note: no i++ statement, because the shifting of the index is handeled by our algorithm. Having i++ would shift one character too many, thus skipping one possible solution
	for(i=0;i<text_length;) {
		// We can stop trying to find a match if our pattern couldn't possibly fit in the remaining text anymore.
		if((i + pattern_length) > text_length)
			return -1;
		else {
			compares = 0;
			//Try to match the pattern with the current alignment
			for(j=0;j<pattern_length;j++)
				if(pattern[j] == text[i+j])
					compares++;
				else break;
			//If we reached the end of the pattern, this means we found the correct alignment
			if(compares == pattern_length)
				return i;
			else {
				// Cover the exceptional case where the very last character of the text mismatches. We wouldn't want to read an index outside the bounds of the array.
				if((i + pattern_length) >= text_length)
					return -1;
				i = i + getShift(shift_function, pattern, text[i + pattern_length]);
			}
		}
	}
}
getShift(shift_function, pattern, char) {
	char_ascii = charToAscii(char);
	min_ascii = shift_function[0];
	max_ascii = shift_function[1];
	shift_table = shift_function[2];

	//If the ascii value of our character is covered by the table, look up the value for this character and return it.
	if(char_ascii >= min_ascii && char_ascii <= max_ascii)
		return shift_table[char_ascii - min_ascii];
	//If the ascii value of a character is smaller or larger than the ones covered by our array, return the largest possible shift
	else return pattern.size;
}
compute_shift_function(pattern) {
	min_ascii = charToAscii(pattern[0]);
	max_ascii = min_ascii;
	shift_table = [];
	shift_function = [];
	//Determine the minimum and maximum ascii values
	for(i=0;i<pattern.size;i++) {
		min_ascii = min(min_ascii, charToAscii(pattern[i]));
		max_ascii = max(max_ascii, charToAscii(pattern[i]));
	}
	
	//Fill the array with the pattern length + 1 so this will be the default shift for characters that are not in the pattern
	for(i=0;i<((max_ascii - min_ascii) + 1);i++)
		shift_table[i] = pattern.size + 1;
		
	//Fill the array with the correct shift for characters that do appear in the pattern
	for(i=0;i<pattern.size;i++)
		shift_table[charToAscii(pattern[i]) - min_ascii] = pattern.size - i;
	
	//Make a compound-variable so we can conveniently store the information about the shift function
	shift_function[0] = min_ascii;
	shift_function[1] = max_ascii;
	shift_function[2] = shift_table;
	return shift_function;
}
//Takes two numbers, returns the smallest
min(num1, num2) {
	if(num1 < num2)
		return num1;
	else return num2;
}
//Takes two numbers, returns the greatest
max(num1, num2) {
	if(num1 < num2)
		return num2;
	else return num1;
}
//Takes one character, converts this character to a numerical value according to our "ascii-table" 
charToAscii(char) {
	ascii_table = "-_abcdefghijklmnopqrstuvwxyz"; // Assuming these are all the ascii-characters
	for(i=0;i<ascii_table.size;i++)
		if(ascii_table[i] == char)
			return i;
	return "ERROR; Could not find the character in the ascii table, in charToAscii for char: " + char;
}
//Takes a string, converts this string to all-lowercase characters
strToLower(str) {
	newstr = "";
	for(i=0;i<str.size;i++)
		newstr = newstr + charToLower(str[i]);
	return newstr;
}
//Returns the lowercase version of the uppercase character. If the character is not uppercase, return the character itself
charToLower(char) {
	alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
	for(j=0;j<(alphabet.size - 26);j++)
		if(char == alphabet[j])
			return alphabet[j+26];
	return char;
}


Create a new paste based on this one


Comments: