综合

img solarsoft

C#算法--------Boyer-Moore算法

发表于2004/10/20 21:14:00  1702人阅读

顾剑辉(solarsoft)

引言

任何有效的应用程序都要处理文本信息,诸如编辑、排版、信息检索、文档分析、知识发掘、语意识别等,这些均需用到文本串的提取和定位。在很多程序设计语言中都有现成的函数可以调用,为程序设计者提供了极大方便。

对于串查找,Robert S.BoyerJ.Strother Moore 1977年实现了一个非常高效的串查找算法BoyerMoore,使用简单的有限状态自动机FMS控制,在目标串中移动,FSM用一个数组来表示。

算法原理,我在这里不做详细的解说了.朋友们可以找找相关的资料.本人主要是参考Scott Robert Ladd著的Java Algorithms 中的BoyerMoore算法.

本文根据此算法介绍在C#中的实现。

算法测试结果如下图:

具体算法如下:

using System;

 

namespace stringsearch

{

     /// <summary>

     /// 字符串搜索的基本抽象类

     /// </summary>

     public abstract class StringSearchTool

     {

         public enum Search

         {

            NOT_FOUND,

            SEARCH_EXACT,

            SEARCH_CASELESS

         }

        

         protected Search search;

 

        protected String pattern;

        

         public string Pattern

         {

              get

              {

                   return pattern;

              }

              set

              {

                 //大小写暂时无用处

                   if(search==Search.SEARCH_CASELESS)

                   {

                       pattern=value;

                       pattern.ToUpper();

                   }

                   else

                   {

                       pattern=value;

                      

                   }

              }

         }   

 

 

         public StringSearchTool()

         {

              search=Search.SEARCH_CASELESS;

              pattern=null;

         }

 

         public StringSearchTool(string p)

        {

             search=Search.SEARCH_CASELESS;

             pattern=p;

 

 

        }

 

 

       

        public StringSearchTool( string p,Search type)

       {

             search=type;

             pattern=p;

 

       }     

        public int getPatternLength()

       {

             return pattern.Length;

       }

 

         public Search getSearchType()

         {

              return search;

         }

 

         public int find(string target)

         {

          return find(target,0);

         }

 

         public abstract int find(string target, int start);

     }

}

 

BoyerMoore算法

using System;

 

namespace stringsearch

{

     /// <summary>

     ///

     /// </summary>

     public class BoyerMoore : stringsearch.StringSearchTool

     {

         protected int [] delta;

         private  static readonly int DELTA_SIZE=65536;

 

         public BoyerMoore():base()

          {

             

         }

         public BoyerMoore(string p):base(p)

         {

             

         }

         public BoyerMoore(string p,Search type):base(p,type)

         {

             

         }

 

 

         public override int find(string target,int start)

         {

              if((pattern==null)||(start<0))

                   return (int)Search.NOT_FOUND;

 

              String target2;

              //if(search==Search.SEARCH_CASELESS)

              //   target2=target.ToUpper();

              //else

                   target2=target;

 

              int t=start+pattern.Length;

 

              while(t<=target2.Length)

              {

                   int p=pattern.Length;

 

                   while(pattern[p-1]==target2[t-1])

                   {

                       if(p>1)

                       {

                            --p;

                            --t;

                       }

                       else

                       {

                            return t-1;

                       }

                   }

                   t+=delta[(int)target2[t-1]];

              }

              return (int)Search.NOT_FOUND;

         }

 

         public  new string Pattern

         {

               get

              {

                   return base.Pattern;

              }

               set

              {

                  base.Pattern=value;

                  int n;

 

                   delta=new int[DELTA_SIZE];

 

                   for(n=0;n<DELTA_SIZE;++n)

                       delta[n]=pattern.Length;

 

                   for(n=1;n<pattern.Length;++n)

                       delta[(int)pattern[n-1]]=pattern.Length-n;

 

                   delta[(int)pattern[pattern.Length-1]]=1;

 

             }

         }

     }

}

测试代码(部分):

private void button1_Click(object sender, System.EventArgs e)

         {

            String terget=label1.Text;

            String pattern=textBox1.Text;

            BoyerMoore bm=new BoyerMoore();

            bm.Pattern=pattern;

            if (bm.find(terget,0)>0)

            MessageBox.Show(this,"你是不是在找 "+pattern+" ?"+"恭喜你找到了!");

            else

            MessageBox.Show(this,"下面的这段话中没有找到你所查找的文字!");

         }

 

编译已通过,在上面的设计中有一些小的漏洞,朋友可以修改以下.

阅读全文
0 0

相关文章推荐

img
取 消
img