【飞秋】C# 调用
SharpICTCLAS雖說是開源的,但07年以后就沒有人再進行維護,跑搜狗的語料問題不少,就連C#版本的作者也承認有不少問題。
即時通訊軟件 想得到更為準確的分詞結果,還是研究SharpICTCLAS3.0,也就是ICTCLAS 2009版。dll 文件不是C#開發的,所以引入要通過DllImport
先自己寫了個類
代碼
using System;
using System.Collections.Generic;
using System.Text;
using System.Runtime.InteropServices;
namespace test
{
[StructLayout(LayoutKind.Explicit)]
public struct result_t
{
[FieldOffset(0)]
public int start;
[FieldOffset(4)]
public int length;
[FieldOffset(8)]
public int sPos;
[FieldOffset(12)]
public int sPosLow;
[FieldOffset(16)]
public int POS_id;
[FieldOffset(20)]
public int word_ID;
[FieldOffset(24)]
public int word_type;
[FieldOffset(28)]
public int weight;
}
class ICTCLAS30
{
const string path = @"ICTCLAS30.dll";
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_Init")]
public static extern bool Init(String sInitDirPath);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_ParagraphProcess")]
public static extern String ParagraphProcess(String sParagraph, int bPOStagged);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_Exit")]
public static extern bool Exit();
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_ImportUserDict")]
public static extern int ImportUserDict(String sFilename);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_FileProcess")]
public static extern bool FileProcess(String sSrcFilename, String sDestFilename, int bPOStagged);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_FileProcessEx")]
public static extern bool FileProcessEx(String sSrcFilename, String sDestFilename);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_GetParagraphProcessAWordCount")]
public static extern int GetParagraphProcessAWordCount(String sParagraph);
//ICTCLAS_GetParagraphProcessAWordCount
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_ParagraphProcessAW")]
public static extern void ParagraphProcessAW(int nCount, [Out, MarshalAs(UnmanagedType.LPArray)] result_t[] result);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_AddUserWord")]
public static extern int AddUserWord(String sWord);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_SaveTheUsrDic")]
public static extern int SaveTheUsrDic();
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_DelUsrWord")]
static extern int DelUsrWord(String sWord);
public ICTCLAS30()
{
}
}
}
調用:
代碼
if (!ICTCLAS30.Init(null))
{
System.Console.WriteLine("Init ICTCLAS failed!");
return;
}
System.Console.WriteLine("Init ICTCLAS Success!");
String pResult;
pResult = ICTCLAS30.ParagraphProcess("點擊下載超女紀敏佳深受觀眾喜愛。禽流感爆發在非典之后。", 1);
System.Console.WriteLine(pResult);
ICTCLAS30.Exit();
注:
使用的時候把ICTCLAS30.dll,Configure.xml和Data文件夾copy到程序exe運行的位置,否則需要制定他們的位置。
如何把pResult搞成昨天博文里的wordResult格式還是個問題。還需要好好研究www.qichepeijian.com。。。。
附上:官方網站的C#調用示例
代碼
using System;
using System.IO;
using System.Runtime.InteropServices;
namespace win_csharp
{
[StructLayout(LayoutKind.Explicit)]
public struct result_t
{
[FieldOffset(0)]
public int start;
[FieldOffset(4)]
public int length;
[FieldOffset(8)]
public int POS_id;
[FieldOffset(12)]
public int word_ID;
}
/// <summary>
/// Class1 的摘要說明。
/// </summary>
class Class1
{
const string path = @"ICTCLAS30.dll";
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_Init")]
public static extern bool ICTCLAS_Init(String sInitDirPath);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_ParagraphProcess")]
public static extern String ICTCLAS_ParagraphProcess(String sParagraph, int bPOStagged);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_Exit")]
public static extern bool ICTCLAS_Exit();
/// <summary>
/// 應用程序的主入口點。
/// </summary>
[STAThread]
static void Main(string[] args)
{
//
// TODO: 在此處添加代碼以啟動應用程序
//
if (!ICTCLAS_Init(null))
{
System.Console.WriteLine("Init ICTCLAS failed!");
return;
}
System.Console.WriteLine("Init ICTCLAS Success!");
String pResult;
pResult = ICTCLAS_ParagraphProcess("點擊下載超女紀敏佳深受觀眾喜愛。禽流感爆發在非典之后。", 1);
System.Console.WriteLine(pResult);
ICTCLAS_Exit();
}
}
}
總結